diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index e1c29f22..9a33d276 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -3715,6 +3715,26 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") + # MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer. + # The GPU writer assembles tile bytes directly on device; threading + # the pixel + nodata-sentinel transform through that pipeline is out + # of scope for the round-trip fix. Refuse the combination so callers + # do not silently get inverted on-disk values. Move the array to the + # CPU and call the eager ``write`` path for MinIsWhite output. + from ._writer import _resolve_photometric as _resolve_photo_gpu + _gpu_samples_hint = (data.shape[2] if hasattr(data, 'shape') + and data.ndim == 3 else 1) + _gpu_resolved_photo, _ = _resolve_photo_gpu( + photometric, _gpu_samples_hint) + if _gpu_resolved_photo == 0 and _gpu_samples_hint == 1: + raise NotImplementedError( + "photometric='miniswhite' on the GPU writer is not " + "supported: the writer-side pixel inversion that mirrors " + "the reader's unconditional MinIsWhite inversion (issue " + "#1836) is only wired into the eager CPU ``write`` path. " + "Move the array to host memory and call to_geotiff with " + "gpu=False, or write with photometric='minisblack' / " + "'auto'.") # write_geotiff_gpu is always tiled, so validate tile_size here and # keep parity with the public to_geotiff entry point. _validate_tile_size_arg(tile_size) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 4a0c2626..2216d612 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -100,6 +100,64 @@ } +def _invert_nodata_for_miniswhite(nodata, dtype: np.dtype): + """Invert a nodata sentinel for MinIsWhite writes. + + The reader's mask path (see ``_reader._miniswhite_inverted_nodata``) + treats the stored sentinel as living in the on-disk display domain, + so the writer pre-inverts the user-supplied sentinel alongside the + pixels. After the writer pre-inversion, the on-disk sentinel byte + matches the on-disk pixel byte that represents "missing", and the + reader inverts both back to the user domain to drive the NaN mask. + Returns ``nodata`` unchanged for signed integer pixels, NaN + sentinels, and unsigned sentinels that are out-of-range or + fractional or non-finite -- matching the reader exactly. ``+/-inf`` + on a float sentinel is negated (the reader does the same). See + issue #1836. + """ + if nodata is None: + return nodata + if dtype.kind == 'u': + if not np.isfinite(nodata): + return nodata + if not float(nodata).is_integer(): + return nodata + vi = int(nodata) + info = np.iinfo(dtype) + if not (info.min <= vi <= info.max): + return nodata + return info.max - vi + if dtype.kind == 'f': + if np.isnan(nodata): + return nodata + return -float(nodata) + return nodata + + +def _apply_photometric_miniswhite_invert( + arr: np.ndarray, resolved_photometric: int, samples_per_pixel: int, +) -> np.ndarray: + """Mirror the reader's MinIsWhite inversion on the writer side. + + The reader unconditionally inverts single-band ``photometric == 0`` + data via ``_reader._apply_photometric_miniswhite``. Without a + matching writer-side inversion, ``to_geotiff(da, photometric= + 'miniswhite')`` silently corrupts pixel values on the round trip. + See issue #1836. + + Returns the pre-inverted array (a new array) so that the reader's + inversion restores the original values. Multi-band data and signed + integer data pass through unchanged, matching the reader. + """ + if resolved_photometric != 0 or samples_per_pixel != 1: + return arr + if arr.dtype.kind == 'u': + return np.iinfo(arr.dtype).max - arr + if arr.dtype.kind == 'f': + return -arr + return arr + + def _resolve_photometric(photometric, samples_per_pixel: int): """Resolve the ``photometric`` writer kwarg to a TIFF photometric int and the matching ExtraSamples list. @@ -1480,6 +1538,54 @@ def write(data: np.ndarray, path: str, *, raise ValueError( f"JPEG compression requires 1 or 3 bands, got {samples}") + # MinIsWhite (photometric=0) requires a writer-side inversion to mirror + # the reader's unconditional inversion of single-band MinIsWhite data + # (see _reader._apply_photometric_miniswhite). Without this, written + # values do not round-trip. The nodata sentinel is inverted alongside + # the pixels so that the on-disk sentinel byte matches the on-disk + # pixel byte that means "missing" -- the reader's existing mask logic + # (issue #1809) then identifies the correct positions and rewrites + # them to NaN. Issue #1836. + _samples = data.shape[2] if data.ndim == 3 else 1 + _resolved_photo, _ = _resolve_photometric(photometric, _samples) + # An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the + # kwarg-resolved value when the IFD is written (issue #1769). The + # pre-inversion decision below would otherwise transform pixels for + # one photometric value while the on-disk tag advertises a different + # one. Refuse the combination so callers do not end up with corrupt + # round-trips through the override path. + _extra_tags_photo = None + if extra_tags is not None: + for _et in extra_tags: + if _et[0] == TAG_PHOTOMETRIC: + _extra_tags_photo = int(_et[3]) + break + if (_extra_tags_photo is not None + and _extra_tags_photo != _resolved_photo + and (_extra_tags_photo == 0 or _resolved_photo == 0) + and _samples == 1): + raise ValueError( + f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) " + f"disagrees with photometric={photometric!r} for a " + f"single-band raster where MinIsWhite (photometric=0) " + f"requires writer-side pixel inversion. The override would " + f"either pre-invert pixels for a non-MinIsWhite tag or skip " + f"inversion for a MinIsWhite tag. Pass photometric= directly " + f"instead, or drop the override.") + if _resolved_photo == 0 and _samples == 1: + if cog or overview_levels is not None: + raise NotImplementedError( + "photometric='miniswhite' is not supported with " + "cog=True or explicit overview_levels: overview reducers " + "('min', 'max', 'mode', ...) do not commute with the " + "pixel inversion, so summary statistics would not match " + "the user-domain values. Write without overviews, or " + "use photometric='minisblack' / 'auto'.") + data = _apply_photometric_miniswhite_invert( + data, _resolved_photo, _samples) + if nodata is not None: + nodata = _invert_nodata_for_miniswhite(nodata, data.dtype) + # Build pixel data parts parts = [] @@ -1758,6 +1864,23 @@ def write_streaming(dask_data, path: str, *, samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 dtype = dask_data.dtype + # MinIsWhite pre-inversion (issue #1836) runs per-array in the eager + # ``write`` path. The streaming dask path materialises one tile-row + # at a time, so applying the inversion correctly would require + # threading the transform through every per-tile segment. That + # plumbing is out of scope for the round-trip fix; refuse the + # combination so callers do not silently get inverted on-disk values. + # Callers can ``.compute()`` first and use the eager ``write`` path. + _resolved_photo_ds, _ = _resolve_photometric(photometric, samples) + if _resolved_photo_ds == 0 and samples == 1: + raise NotImplementedError( + "photometric='miniswhite' on a dask-backed array is not " + "supported: the streaming writer would have to thread the " + "writer-side pixel inversion through every tile segment to " + "match the reader's unconditional MinIsWhite inversion " + "(issue #1836). Call ``.compute()`` first to use the eager " + "writer, or write with photometric='minisblack' / 'auto'.") + # Match the eager path's dtype promotion out_dtype = dtype if out_dtype == np.float16: diff --git a/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py b/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py index 6520ada6..0c177eab 100644 --- a/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py +++ b/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py @@ -116,5 +116,8 @@ def test_gpu_miniswhite_matches_cpu_reader(tmp_path): cpu = open_geotiff(path) gpu = open_geotiff(path, gpu=True) - np.testing.assert_array_equal(cpu.values, np.iinfo(stored.dtype).max - stored) + # After #1836 the writer pre-inverts MinIsWhite pixels so the reader's + # unconditional inversion restores the user-domain values -- the + # round-trip is the identity for both backends. + np.testing.assert_array_equal(cpu.values, stored) np.testing.assert_array_equal(gpu.data.get(), cpu.values) diff --git a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py new file mode 100644 index 00000000..17fbdc00 --- /dev/null +++ b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py @@ -0,0 +1,170 @@ +"""Regression tests for ``photometric='miniswhite'`` round-tripping. + +The reader unconditionally inverts single-band MinIsWhite data at +``_reader._apply_photometric_miniswhite``. Before issue #1836 the writer +set the TIFF Photometric tag to 0 without inverting pixel values, so +``to_geotiff(..., photometric='miniswhite')`` followed by ``open_geotiff`` +returned ``iinfo(dtype).max - original`` for unsigned-integer pixels and +``-original`` for floating-point pixels. Signed integers passed through +both sides unchanged, matching the reader. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +def _da(arr: np.ndarray) -> xr.DataArray: + h, w = arr.shape + return xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(h, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + + +def test_uint8_miniswhite_round_trip(tmp_path): + arr = np.array([[0, 1, 127, 254, 255]], dtype=np.uint8) + path = tmp_path / 'u8_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_uint16_miniswhite_round_trip(tmp_path): + info = np.iinfo(np.uint16) + arr = np.array([[0, 1, info.max // 2, info.max - 1, info.max]], + dtype=np.uint16) + path = tmp_path / 'u16_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_float32_miniswhite_round_trip(tmp_path): + arr = np.array([[-3.5, 0.0, 0.25, 7.5]], dtype=np.float32) + path = tmp_path / 'f32_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_allclose(np.asarray(r.values), arr) + + +def test_miniswhite_with_nodata_round_trip(tmp_path): + arr = np.array([[10.0, np.nan, 20.0, 30.0]], dtype=np.float32) + path = tmp_path / 'f32_msw_nd_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite', nodata=-9999.0) + r = open_geotiff(str(path)) + out = np.asarray(r.values) + assert np.isnan(out[0, 1]), ( + "nodata position must round-trip back to NaN after MinIsWhite " + f"inversion; got {out!r}" + ) + np.testing.assert_allclose(out[0, [0, 2, 3]], arr[0, [0, 2, 3]]) + + +def test_miniswhite_int_passthrough(tmp_path): + """The reader does not invert signed integer MinIsWhite data, so the + writer must also pass it through unchanged. Otherwise the round-trip + would silently corrupt signed data.""" + arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16) + path = tmp_path / 'i16_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_uint16_miniswhite_with_in_range_nodata_round_trip(tmp_path): + """Cover the unsigned-nodata inversion branch: an in-range integer + sentinel must round-trip back to NaN despite the writer pre-inverting + both pixels and the sentinel value. + + The on-disk sentinel byte is ``iinfo(uint16).max - 9999 = 55536``, + which collides with a real ``55536`` pixel post-inversion only when + the reader's mask compares against the right sentinel. The mid-range + pixels ``200`` and ``65000`` exercise both ends of the dtype range. + """ + arr = np.array([[10, 9999, 200, 65000]], dtype=np.uint16) + path = tmp_path / 'u16_msw_nd_1836.tif' + da_in = xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(1, dtype=np.float64), + 'x': np.arange(4, dtype=np.float64)}, + attrs={'res': (1.0, 1.0), 'nodata': 9999}, + ) + to_geotiff(da_in, str(path), photometric='miniswhite', nodata=9999) + r = open_geotiff(str(path)) + out = np.asarray(r.values) + # The reader casts uint16 to float to express NaN at the masked + # position. The other three pixels must round-trip exactly. + assert np.isnan(out[0, 1]), ( + f"in-range uint nodata must round-trip to NaN through the " + f"writer-side sentinel inversion; got {out!r}" + ) + np.testing.assert_array_equal( + out[0, [0, 2, 3]].astype(np.uint16), arr[0, [0, 2, 3]]) + + +def test_miniswhite_rejected_with_cog_no_overviews(tmp_path): + arr = np.zeros((512, 512), dtype=np.uint8) + path = tmp_path / 'cog_msw_1836.tif' + with pytest.raises(NotImplementedError, match='miniswhite'): + to_geotiff(_da(arr), str(path), photometric='miniswhite', cog=True) + + +def test_miniswhite_rejected_with_explicit_overviews_no_cog(tmp_path): + """Isolate the ``overview_levels`` branch: ``cog=False`` so the cog + gate alone would not trigger, only the overview branch does. + """ + from xrspatial.geotiff._writer import write as _eager_write + arr = np.zeros((256, 256), dtype=np.uint8) + path = str(tmp_path / 'ov_msw_1836.tif') + with pytest.raises(NotImplementedError, match='miniswhite'): + _eager_write(arr, path, photometric='miniswhite', + cog=False, overview_levels=[2]) + + +def test_miniswhite_rejected_on_dask_path(tmp_path): + """``to_geotiff`` on a dask-backed DataArray must refuse miniswhite + because the streaming writer does not pre-invert pixels per tile. + """ + dask_array = pytest.importorskip("dask.array") + arr = dask_array.zeros((64, 64), dtype=np.uint8, chunks=32) + da_in = xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(64, dtype=np.float64), + 'x': np.arange(64, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + path = tmp_path / 'dask_msw_1836.tif' + with pytest.raises(NotImplementedError, match='miniswhite'): + to_geotiff(da_in, str(path), photometric='miniswhite') + + +def test_miniswhite_rejected_with_extra_tags_photometric_override(tmp_path): + """An ``extra_tags`` TAG_PHOTOMETRIC override that disagrees with the + kwarg-resolved photometric for the MinIsWhite single-band case must + be refused -- otherwise the writer transforms pixels for one tag + while the IFD advertises a different one. + """ + from xrspatial.geotiff._writer import write as _eager_write + from xrspatial.geotiff._dtypes import SHORT + from xrspatial.geotiff._header import TAG_PHOTOMETRIC + arr = np.array([[10, 20]], dtype=np.uint8) + path = str(tmp_path / 'extras_msw_1836.tif') + # photometric kwarg defaults to MinIsBlack (1), extra_tags forces + # MinIsWhite (0) on disk. Without the guard this would write pixels + # in MinIsBlack domain but tag them MinIsWhite, so the reader would + # invert and corrupt them. + with pytest.raises(ValueError, match='TAG_PHOTOMETRIC'): + _eager_write( + arr, path, + photometric='auto', + extra_tags=[(TAG_PHOTOMETRIC, SHORT, 1, 0)], + )