From b7a65a51b826be1d73d7f0ab5b41ce43d7e2877b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 21:19:19 -0700 Subject: [PATCH 1/2] geotiff: writer pre-inverts MinIsWhite pixels so values round-trip (#1836) photometric='miniswhite' set the TIFF Photometric tag to 0 without inverting pixel values. The reader unconditionally inverts single-band MinIsWhite data (see _reader._apply_photometric_miniswhite), so to_geotiff(..., photometric='miniswhite') followed by open_geotiff returned iinfo(dtype).max - original (uint) or -original (float) instead of the user's values. Mirror the reader's inversion on the writer side so the round trip is the identity. Invert the nodata sentinel alongside the pixels so the reader's existing mask logic (issue #1809) identifies the correct nodata positions and rewrites them to NaN. Signed integer data passes through unchanged on both sides. Reject photometric='miniswhite' with cog=True or explicit overview_levels: the overview reducers ('min', 'max', 'mode', ...) do not commute with the pixel inversion, so summary statistics would not match the user-domain values. Mean would still work in principle, but the codepath does not know which reducer the user picked at the point where the inversion would have to be undone, so refuse the combination outright with a clear error. Update test_miniswhite_backend_parity_1797 to assert the new (correct) round-trip identity. The previous expectation pinned the buggy sign-flip; the HTTP cases keep their existing expectations since tifffile-written files do not pre-invert. --- xrspatial/geotiff/_writer.py | 80 ++++++++++++++++ .../test_miniswhite_backend_parity_1797.py | 5 +- .../test_miniswhite_writer_roundtrip_1836.py | 91 +++++++++++++++++++ 3 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 4a0c2626..7b07a88d 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -100,6 +100,62 @@ } +def _invert_nodata_for_miniswhite(nodata, dtype: np.dtype): + """Invert a nodata sentinel for MinIsWhite writes. + + The reader's mask path (see ``_reader._miniswhite_inverted_nodata``) + treats the stored sentinel as living in the on-disk display domain, + so the writer pre-inverts the user-supplied sentinel alongside the + pixels. After the writer pre-inversion, the on-disk sentinel byte + matches the on-disk pixel byte that represents "missing", and the + reader inverts both back to the user domain to drive the NaN mask. + Returns ``nodata`` unchanged for signed integer / out-of-range / + non-finite / fractional sentinels, mirroring the reader exactly. + See issue #1836. + """ + if nodata is None: + return nodata + if dtype.kind == 'u': + if not np.isfinite(nodata): + return nodata + if not float(nodata).is_integer(): + return nodata + vi = int(nodata) + info = np.iinfo(dtype) + if not (info.min <= vi <= info.max): + return nodata + return info.max - vi + if dtype.kind == 'f': + if np.isnan(nodata): + return nodata + return -float(nodata) + return nodata + + +def _apply_photometric_miniswhite_invert( + arr: np.ndarray, resolved_photometric: int, samples_per_pixel: int, +) -> np.ndarray: + """Mirror the reader's MinIsWhite inversion on the writer side. + + The reader unconditionally inverts single-band ``photometric == 0`` + data via ``_reader._apply_photometric_miniswhite``. Without a + matching writer-side inversion, ``to_geotiff(da, photometric= + 'miniswhite')`` silently corrupts pixel values on the round trip. + See issue #1836. + + Returns the pre-inverted array (a new array) so that the reader's + inversion restores the original values. Multi-band data and signed + integer data pass through unchanged, matching the reader. + """ + if resolved_photometric != 0 or samples_per_pixel != 1: + return arr + if arr.dtype.kind == 'u': + return np.iinfo(arr.dtype).max - arr + if arr.dtype.kind == 'f': + return -arr + return arr + + def _resolve_photometric(photometric, samples_per_pixel: int): """Resolve the ``photometric`` writer kwarg to a TIFF photometric int and the matching ExtraSamples list. @@ -1480,6 +1536,30 @@ def write(data: np.ndarray, path: str, *, raise ValueError( f"JPEG compression requires 1 or 3 bands, got {samples}") + # MinIsWhite (photometric=0) requires a writer-side inversion to mirror + # the reader's unconditional inversion of single-band MinIsWhite data + # (see _reader._apply_photometric_miniswhite). Without this, written + # values do not round-trip. The nodata sentinel is inverted alongside + # the pixels so that the on-disk sentinel byte matches the on-disk + # pixel byte that means "missing" -- the reader's existing mask logic + # (issue #1809) then identifies the correct positions and rewrites + # them to NaN. Issue #1836. + _samples = data.shape[2] if data.ndim == 3 else 1 + _resolved_photo, _ = _resolve_photometric(photometric, _samples) + if _resolved_photo == 0 and _samples == 1: + if cog or overview_levels is not None: + raise NotImplementedError( + "photometric='miniswhite' is not supported with " + "cog=True or explicit overview_levels: overview reducers " + "('min', 'max', 'mode', ...) do not commute with the " + "pixel inversion, so summary statistics would not match " + "the user-domain values. Write without overviews, or " + "use photometric='minisblack' / 'auto'.") + data = _apply_photometric_miniswhite_invert( + data, _resolved_photo, _samples) + if nodata is not None: + nodata = _invert_nodata_for_miniswhite(nodata, data.dtype) + # Build pixel data parts parts = [] diff --git a/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py b/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py index 6520ada6..0c177eab 100644 --- a/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py +++ b/xrspatial/geotiff/tests/test_miniswhite_backend_parity_1797.py @@ -116,5 +116,8 @@ def test_gpu_miniswhite_matches_cpu_reader(tmp_path): cpu = open_geotiff(path) gpu = open_geotiff(path, gpu=True) - np.testing.assert_array_equal(cpu.values, np.iinfo(stored.dtype).max - stored) + # After #1836 the writer pre-inverts MinIsWhite pixels so the reader's + # unconditional inversion restores the user-domain values -- the + # round-trip is the identity for both backends. + np.testing.assert_array_equal(cpu.values, stored) np.testing.assert_array_equal(gpu.data.get(), cpu.values) diff --git a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py new file mode 100644 index 00000000..eb039fa6 --- /dev/null +++ b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py @@ -0,0 +1,91 @@ +"""Regression tests for ``photometric='miniswhite'`` round-tripping. + +The reader unconditionally inverts single-band MinIsWhite data at +``_reader._apply_photometric_miniswhite``. Before issue #1836 the writer +set the TIFF Photometric tag to 0 without inverting pixel values, so +``to_geotiff(..., photometric='miniswhite')`` followed by ``open_geotiff`` +returned ``iinfo(dtype).max - original`` instead of the user's values. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +def _da(arr: np.ndarray) -> xr.DataArray: + h, w = arr.shape + return xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(h, dtype=np.float64), + 'x': np.arange(w, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + + +def test_uint8_miniswhite_round_trip(tmp_path): + arr = np.array([[0, 1, 127, 254, 255]], dtype=np.uint8) + path = tmp_path / 'u8_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_uint16_miniswhite_round_trip(tmp_path): + info = np.iinfo(np.uint16) + arr = np.array([[0, 1, info.max // 2, info.max - 1, info.max]], + dtype=np.uint16) + path = tmp_path / 'u16_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_float32_miniswhite_round_trip(tmp_path): + arr = np.array([[-3.5, 0.0, 0.25, 7.5]], dtype=np.float32) + path = tmp_path / 'f32_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_allclose(np.asarray(r.values), arr) + + +def test_miniswhite_with_nodata_round_trip(tmp_path): + arr = np.array([[10.0, np.nan, 20.0, 30.0]], dtype=np.float32) + path = tmp_path / 'f32_msw_nd_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite', nodata=-9999.0) + r = open_geotiff(str(path)) + out = np.asarray(r.values) + assert np.isnan(out[0, 1]), ( + "nodata position must round-trip back to NaN after MinIsWhite " + f"inversion; got {out!r}" + ) + np.testing.assert_allclose(out[0, [0, 2, 3]], arr[0, [0, 2, 3]]) + + +def test_miniswhite_int_passthrough(tmp_path): + """The reader does not invert signed integer MinIsWhite data, so the + writer must also pass it through unchanged. Otherwise the round-trip + would silently corrupt signed data.""" + arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16) + path = tmp_path / 'i16_msw_1836.tif' + to_geotiff(_da(arr), str(path), photometric='miniswhite') + r = open_geotiff(str(path)) + np.testing.assert_array_equal(np.asarray(r.values), arr) + + +def test_miniswhite_rejected_with_cog(tmp_path): + arr = np.zeros((512, 512), dtype=np.uint8) + path = tmp_path / 'cog_msw_1836.tif' + with pytest.raises(NotImplementedError, match='miniswhite'): + to_geotiff(_da(arr), str(path), photometric='miniswhite', cog=True) + + +def test_miniswhite_rejected_with_explicit_overviews(tmp_path): + arr = np.zeros((256, 256), dtype=np.uint8) + path = tmp_path / 'ov_msw_1836.tif' + with pytest.raises(NotImplementedError, match='miniswhite'): + to_geotiff(_da(arr), str(path), photometric='miniswhite', + cog=True, overview_levels=[2, 4]) From fb9f8d29724ce110084ade9cdcf1ff0cb5657a23 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 14 May 2026 08:42:19 -0700 Subject: [PATCH 2/2] geotiff: address Copilot review on PR #1837 (#1836) Six review items, all valid: - Refuse ``photometric='miniswhite'`` in ``write_streaming`` (dask path) and ``write_geotiff_gpu`` (GPU path). The pre-inversion was only wired into the eager CPU ``write``; the streaming and GPU paths silently bypassed it. Direct callers to ``.compute()`` and the eager writer. - Refuse an ``extra_tags`` TAG_PHOTOMETRIC override that disagrees with the kwarg-resolved photometric for the MinIsWhite single-band case, since the override would otherwise leave pixels and the on-disk tag in different photometric domains. - Split the overview-rejection test so the ``overview_levels`` branch is covered with ``cog=False`` rather than overlapping the ``cog`` branch. - Add a uint MinIsWhite round-trip test with an in-range integer nodata sentinel so the unsigned-sentinel inversion branch is exercised by tests, not just by the float path. - Correct the module docstring to describe both the unsigned (``iinfo.max - original``) and the float (``-original``) pre-fix failure modes. - Correct ``_invert_nodata_for_miniswhite`` docstring: ``+/-inf`` on a float sentinel is negated (matching the reader); only NaN passes through unchanged. --- xrspatial/geotiff/__init__.py | 20 ++++ xrspatial/geotiff/_writer.py | 49 +++++++++- .../test_miniswhite_writer_roundtrip_1836.py | 91 +++++++++++++++++-- 3 files changed, 151 insertions(+), 9 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index e1c29f22..9a33d276 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -3715,6 +3715,26 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") + # MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer. + # The GPU writer assembles tile bytes directly on device; threading + # the pixel + nodata-sentinel transform through that pipeline is out + # of scope for the round-trip fix. Refuse the combination so callers + # do not silently get inverted on-disk values. Move the array to the + # CPU and call the eager ``write`` path for MinIsWhite output. + from ._writer import _resolve_photometric as _resolve_photo_gpu + _gpu_samples_hint = (data.shape[2] if hasattr(data, 'shape') + and data.ndim == 3 else 1) + _gpu_resolved_photo, _ = _resolve_photo_gpu( + photometric, _gpu_samples_hint) + if _gpu_resolved_photo == 0 and _gpu_samples_hint == 1: + raise NotImplementedError( + "photometric='miniswhite' on the GPU writer is not " + "supported: the writer-side pixel inversion that mirrors " + "the reader's unconditional MinIsWhite inversion (issue " + "#1836) is only wired into the eager CPU ``write`` path. " + "Move the array to host memory and call to_geotiff with " + "gpu=False, or write with photometric='minisblack' / " + "'auto'.") # write_geotiff_gpu is always tiled, so validate tile_size here and # keep parity with the public to_geotiff entry point. _validate_tile_size_arg(tile_size) diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 7b07a88d..2216d612 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -109,9 +109,11 @@ def _invert_nodata_for_miniswhite(nodata, dtype: np.dtype): pixels. After the writer pre-inversion, the on-disk sentinel byte matches the on-disk pixel byte that represents "missing", and the reader inverts both back to the user domain to drive the NaN mask. - Returns ``nodata`` unchanged for signed integer / out-of-range / - non-finite / fractional sentinels, mirroring the reader exactly. - See issue #1836. + Returns ``nodata`` unchanged for signed integer pixels, NaN + sentinels, and unsigned sentinels that are out-of-range or + fractional or non-finite -- matching the reader exactly. ``+/-inf`` + on a float sentinel is negated (the reader does the same). See + issue #1836. """ if nodata is None: return nodata @@ -1546,6 +1548,30 @@ def write(data: np.ndarray, path: str, *, # them to NaN. Issue #1836. _samples = data.shape[2] if data.ndim == 3 else 1 _resolved_photo, _ = _resolve_photometric(photometric, _samples) + # An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the + # kwarg-resolved value when the IFD is written (issue #1769). The + # pre-inversion decision below would otherwise transform pixels for + # one photometric value while the on-disk tag advertises a different + # one. Refuse the combination so callers do not end up with corrupt + # round-trips through the override path. + _extra_tags_photo = None + if extra_tags is not None: + for _et in extra_tags: + if _et[0] == TAG_PHOTOMETRIC: + _extra_tags_photo = int(_et[3]) + break + if (_extra_tags_photo is not None + and _extra_tags_photo != _resolved_photo + and (_extra_tags_photo == 0 or _resolved_photo == 0) + and _samples == 1): + raise ValueError( + f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) " + f"disagrees with photometric={photometric!r} for a " + f"single-band raster where MinIsWhite (photometric=0) " + f"requires writer-side pixel inversion. The override would " + f"either pre-invert pixels for a non-MinIsWhite tag or skip " + f"inversion for a MinIsWhite tag. Pass photometric= directly " + f"instead, or drop the override.") if _resolved_photo == 0 and _samples == 1: if cog or overview_levels is not None: raise NotImplementedError( @@ -1838,6 +1864,23 @@ def write_streaming(dask_data, path: str, *, samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 dtype = dask_data.dtype + # MinIsWhite pre-inversion (issue #1836) runs per-array in the eager + # ``write`` path. The streaming dask path materialises one tile-row + # at a time, so applying the inversion correctly would require + # threading the transform through every per-tile segment. That + # plumbing is out of scope for the round-trip fix; refuse the + # combination so callers do not silently get inverted on-disk values. + # Callers can ``.compute()`` first and use the eager ``write`` path. + _resolved_photo_ds, _ = _resolve_photometric(photometric, samples) + if _resolved_photo_ds == 0 and samples == 1: + raise NotImplementedError( + "photometric='miniswhite' on a dask-backed array is not " + "supported: the streaming writer would have to thread the " + "writer-side pixel inversion through every tile segment to " + "match the reader's unconditional MinIsWhite inversion " + "(issue #1836). Call ``.compute()`` first to use the eager " + "writer, or write with photometric='minisblack' / 'auto'.") + # Match the eager path's dtype promotion out_dtype = dtype if out_dtype == np.float16: diff --git a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py index eb039fa6..17fbdc00 100644 --- a/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py +++ b/xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py @@ -4,7 +4,9 @@ ``_reader._apply_photometric_miniswhite``. Before issue #1836 the writer set the TIFF Photometric tag to 0 without inverting pixel values, so ``to_geotiff(..., photometric='miniswhite')`` followed by ``open_geotiff`` -returned ``iinfo(dtype).max - original`` instead of the user's values. +returned ``iinfo(dtype).max - original`` for unsigned-integer pixels and +``-original`` for floating-point pixels. Signed integers passed through +both sides unchanged, matching the reader. """ from __future__ import annotations @@ -76,16 +78,93 @@ def test_miniswhite_int_passthrough(tmp_path): np.testing.assert_array_equal(np.asarray(r.values), arr) -def test_miniswhite_rejected_with_cog(tmp_path): +def test_uint16_miniswhite_with_in_range_nodata_round_trip(tmp_path): + """Cover the unsigned-nodata inversion branch: an in-range integer + sentinel must round-trip back to NaN despite the writer pre-inverting + both pixels and the sentinel value. + + The on-disk sentinel byte is ``iinfo(uint16).max - 9999 = 55536``, + which collides with a real ``55536`` pixel post-inversion only when + the reader's mask compares against the right sentinel. The mid-range + pixels ``200`` and ``65000`` exercise both ends of the dtype range. + """ + arr = np.array([[10, 9999, 200, 65000]], dtype=np.uint16) + path = tmp_path / 'u16_msw_nd_1836.tif' + da_in = xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(1, dtype=np.float64), + 'x': np.arange(4, dtype=np.float64)}, + attrs={'res': (1.0, 1.0), 'nodata': 9999}, + ) + to_geotiff(da_in, str(path), photometric='miniswhite', nodata=9999) + r = open_geotiff(str(path)) + out = np.asarray(r.values) + # The reader casts uint16 to float to express NaN at the masked + # position. The other three pixels must round-trip exactly. + assert np.isnan(out[0, 1]), ( + f"in-range uint nodata must round-trip to NaN through the " + f"writer-side sentinel inversion; got {out!r}" + ) + np.testing.assert_array_equal( + out[0, [0, 2, 3]].astype(np.uint16), arr[0, [0, 2, 3]]) + + +def test_miniswhite_rejected_with_cog_no_overviews(tmp_path): arr = np.zeros((512, 512), dtype=np.uint8) path = tmp_path / 'cog_msw_1836.tif' with pytest.raises(NotImplementedError, match='miniswhite'): to_geotiff(_da(arr), str(path), photometric='miniswhite', cog=True) -def test_miniswhite_rejected_with_explicit_overviews(tmp_path): +def test_miniswhite_rejected_with_explicit_overviews_no_cog(tmp_path): + """Isolate the ``overview_levels`` branch: ``cog=False`` so the cog + gate alone would not trigger, only the overview branch does. + """ + from xrspatial.geotiff._writer import write as _eager_write arr = np.zeros((256, 256), dtype=np.uint8) - path = tmp_path / 'ov_msw_1836.tif' + path = str(tmp_path / 'ov_msw_1836.tif') + with pytest.raises(NotImplementedError, match='miniswhite'): + _eager_write(arr, path, photometric='miniswhite', + cog=False, overview_levels=[2]) + + +def test_miniswhite_rejected_on_dask_path(tmp_path): + """``to_geotiff`` on a dask-backed DataArray must refuse miniswhite + because the streaming writer does not pre-invert pixels per tile. + """ + dask_array = pytest.importorskip("dask.array") + arr = dask_array.zeros((64, 64), dtype=np.uint8, chunks=32) + da_in = xr.DataArray( + arr, + dims=('y', 'x'), + coords={'y': np.arange(64, dtype=np.float64), + 'x': np.arange(64, dtype=np.float64)}, + attrs={'res': (1.0, 1.0)}, + ) + path = tmp_path / 'dask_msw_1836.tif' with pytest.raises(NotImplementedError, match='miniswhite'): - to_geotiff(_da(arr), str(path), photometric='miniswhite', - cog=True, overview_levels=[2, 4]) + to_geotiff(da_in, str(path), photometric='miniswhite') + + +def test_miniswhite_rejected_with_extra_tags_photometric_override(tmp_path): + """An ``extra_tags`` TAG_PHOTOMETRIC override that disagrees with the + kwarg-resolved photometric for the MinIsWhite single-band case must + be refused -- otherwise the writer transforms pixels for one tag + while the IFD advertises a different one. + """ + from xrspatial.geotiff._writer import write as _eager_write + from xrspatial.geotiff._dtypes import SHORT + from xrspatial.geotiff._header import TAG_PHOTOMETRIC + arr = np.array([[10, 20]], dtype=np.uint8) + path = str(tmp_path / 'extras_msw_1836.tif') + # photometric kwarg defaults to MinIsBlack (1), extra_tags forces + # MinIsWhite (0) on disk. Without the guard this would write pixels + # in MinIsBlack domain but tag them MinIsWhite, so the reader would + # invert and corrupt them. + with pytest.raises(ValueError, match='TAG_PHOTOMETRIC'): + _eager_write( + arr, path, + photometric='auto', + extra_tags=[(TAG_PHOTOMETRIC, SHORT, 1, 0)], + )