Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3715,6 +3715,26 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
"compression is tile-based; the strip layout is not "
"implemented on the GPU path. Use to_geotiff(..., gpu=False, "
"tiled=False) for strip output on CPU.")
# MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer.
# The GPU writer assembles tile bytes directly on device; threading
# the pixel + nodata-sentinel transform through that pipeline is out
# of scope for the round-trip fix. Refuse the combination so callers
# do not silently get inverted on-disk values. Move the array to the
# CPU and call the eager ``write`` path for MinIsWhite output.
from ._writer import _resolve_photometric as _resolve_photo_gpu
_gpu_samples_hint = (data.shape[2] if hasattr(data, 'shape')
and data.ndim == 3 else 1)
_gpu_resolved_photo, _ = _resolve_photo_gpu(
photometric, _gpu_samples_hint)
if _gpu_resolved_photo == 0 and _gpu_samples_hint == 1:
raise NotImplementedError(
"photometric='miniswhite' on the GPU writer is not "
"supported: the writer-side pixel inversion that mirrors "
"the reader's unconditional MinIsWhite inversion (issue "
"#1836) is only wired into the eager CPU ``write`` path. "
"Move the array to host memory and call to_geotiff with "
"gpu=False, or write with photometric='minisblack' / "
"'auto'.")
# write_geotiff_gpu is always tiled, so validate tile_size here and
# keep parity with the public to_geotiff entry point.
_validate_tile_size_arg(tile_size)
Expand Down
123 changes: 123 additions & 0 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,64 @@
}


def _invert_nodata_for_miniswhite(nodata, dtype: np.dtype):
"""Invert a nodata sentinel for MinIsWhite writes.

The reader's mask path (see ``_reader._miniswhite_inverted_nodata``)
treats the stored sentinel as living in the on-disk display domain,
so the writer pre-inverts the user-supplied sentinel alongside the
pixels. After the writer pre-inversion, the on-disk sentinel byte
matches the on-disk pixel byte that represents "missing", and the
reader inverts both back to the user domain to drive the NaN mask.
Returns ``nodata`` unchanged for signed integer pixels, NaN
sentinels, and unsigned sentinels that are out-of-range or
fractional or non-finite -- matching the reader exactly. ``+/-inf``
on a float sentinel is negated (the reader does the same). See
issue #1836.
"""
if nodata is None:
return nodata
if dtype.kind == 'u':
if not np.isfinite(nodata):
return nodata
if not float(nodata).is_integer():
return nodata
vi = int(nodata)
info = np.iinfo(dtype)
if not (info.min <= vi <= info.max):
return nodata
return info.max - vi
if dtype.kind == 'f':
if np.isnan(nodata):
return nodata
return -float(nodata)
return nodata


def _apply_photometric_miniswhite_invert(
arr: np.ndarray, resolved_photometric: int, samples_per_pixel: int,
) -> np.ndarray:
"""Mirror the reader's MinIsWhite inversion on the writer side.

The reader unconditionally inverts single-band ``photometric == 0``
data via ``_reader._apply_photometric_miniswhite``. Without a
matching writer-side inversion, ``to_geotiff(da, photometric=
'miniswhite')`` silently corrupts pixel values on the round trip.
See issue #1836.

Returns the pre-inverted array (a new array) so that the reader's
inversion restores the original values. Multi-band data and signed
integer data pass through unchanged, matching the reader.
"""
if resolved_photometric != 0 or samples_per_pixel != 1:
return arr
if arr.dtype.kind == 'u':
return np.iinfo(arr.dtype).max - arr
if arr.dtype.kind == 'f':
return -arr
return arr


def _resolve_photometric(photometric, samples_per_pixel: int):
"""Resolve the ``photometric`` writer kwarg to a TIFF photometric int
and the matching ExtraSamples list.
Expand Down Expand Up @@ -1480,6 +1538,54 @@ def write(data: np.ndarray, path: str, *,
raise ValueError(
f"JPEG compression requires 1 or 3 bands, got {samples}")

# MinIsWhite (photometric=0) requires a writer-side inversion to mirror
# the reader's unconditional inversion of single-band MinIsWhite data
# (see _reader._apply_photometric_miniswhite). Without this, written
# values do not round-trip. The nodata sentinel is inverted alongside
# the pixels so that the on-disk sentinel byte matches the on-disk
# pixel byte that means "missing" -- the reader's existing mask logic
# (issue #1809) then identifies the correct positions and rewrites
# them to NaN. Issue #1836.
_samples = data.shape[2] if data.ndim == 3 else 1
_resolved_photo, _ = _resolve_photometric(photometric, _samples)
# An ``extra_tags`` entry with TAG_PHOTOMETRIC silently overrides the
# kwarg-resolved value when the IFD is written (issue #1769). The
# pre-inversion decision below would otherwise transform pixels for
# one photometric value while the on-disk tag advertises a different
# one. Refuse the combination so callers do not end up with corrupt
# round-trips through the override path.
_extra_tags_photo = None
if extra_tags is not None:
for _et in extra_tags:
if _et[0] == TAG_PHOTOMETRIC:
_extra_tags_photo = int(_et[3])
break
if (_extra_tags_photo is not None
and _extra_tags_photo != _resolved_photo
and (_extra_tags_photo == 0 or _resolved_photo == 0)
and _samples == 1):
raise ValueError(
f"extra_tags TAG_PHOTOMETRIC override ({_extra_tags_photo}) "
f"disagrees with photometric={photometric!r} for a "
f"single-band raster where MinIsWhite (photometric=0) "
f"requires writer-side pixel inversion. The override would "
f"either pre-invert pixels for a non-MinIsWhite tag or skip "
f"inversion for a MinIsWhite tag. Pass photometric= directly "
f"instead, or drop the override.")
if _resolved_photo == 0 and _samples == 1:
Comment on lines +1549 to +1575
if cog or overview_levels is not None:
raise NotImplementedError(
"photometric='miniswhite' is not supported with "
"cog=True or explicit overview_levels: overview reducers "
"('min', 'max', 'mode', ...) do not commute with the "
"pixel inversion, so summary statistics would not match "
"the user-domain values. Write without overviews, or "
"use photometric='minisblack' / 'auto'.")
data = _apply_photometric_miniswhite_invert(
data, _resolved_photo, _samples)
if nodata is not None:
nodata = _invert_nodata_for_miniswhite(nodata, data.dtype)
Comment on lines +1584 to +1587

# Build pixel data parts
parts = []

Expand Down Expand Up @@ -1758,6 +1864,23 @@ def write_streaming(dask_data, path: str, *,
samples = dask_data.shape[2] if dask_data.ndim == 3 else 1
dtype = dask_data.dtype

# MinIsWhite pre-inversion (issue #1836) runs per-array in the eager
# ``write`` path. The streaming dask path materialises one tile-row
# at a time, so applying the inversion correctly would require
# threading the transform through every per-tile segment. That
# plumbing is out of scope for the round-trip fix; refuse the
# combination so callers do not silently get inverted on-disk values.
# Callers can ``.compute()`` first and use the eager ``write`` path.
_resolved_photo_ds, _ = _resolve_photometric(photometric, samples)
if _resolved_photo_ds == 0 and samples == 1:
raise NotImplementedError(
"photometric='miniswhite' on a dask-backed array is not "
"supported: the streaming writer would have to thread the "
"writer-side pixel inversion through every tile segment to "
"match the reader's unconditional MinIsWhite inversion "
"(issue #1836). Call ``.compute()`` first to use the eager "
"writer, or write with photometric='minisblack' / 'auto'.")

# Match the eager path's dtype promotion
out_dtype = dtype
if out_dtype == np.float16:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,5 +116,8 @@ def test_gpu_miniswhite_matches_cpu_reader(tmp_path):
cpu = open_geotiff(path)
gpu = open_geotiff(path, gpu=True)

np.testing.assert_array_equal(cpu.values, np.iinfo(stored.dtype).max - stored)
# After #1836 the writer pre-inverts MinIsWhite pixels so the reader's
# unconditional inversion restores the user-domain values -- the
# round-trip is the identity for both backends.
np.testing.assert_array_equal(cpu.values, stored)
np.testing.assert_array_equal(gpu.data.get(), cpu.values)
170 changes: 170 additions & 0 deletions xrspatial/geotiff/tests/test_miniswhite_writer_roundtrip_1836.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Regression tests for ``photometric='miniswhite'`` round-tripping.

The reader unconditionally inverts single-band MinIsWhite data at
``_reader._apply_photometric_miniswhite``. Before issue #1836 the writer
set the TIFF Photometric tag to 0 without inverting pixel values, so
``to_geotiff(..., photometric='miniswhite')`` followed by ``open_geotiff``
returned ``iinfo(dtype).max - original`` for unsigned-integer pixels and
``-original`` for floating-point pixels. Signed integers passed through
both sides unchanged, matching the reader.
"""
from __future__ import annotations

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


def _da(arr: np.ndarray) -> xr.DataArray:
h, w = arr.shape
return xr.DataArray(
arr,
dims=('y', 'x'),
coords={'y': np.arange(h, dtype=np.float64),
'x': np.arange(w, dtype=np.float64)},
attrs={'res': (1.0, 1.0)},
)


def test_uint8_miniswhite_round_trip(tmp_path):
arr = np.array([[0, 1, 127, 254, 255]], dtype=np.uint8)
path = tmp_path / 'u8_msw_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
r = open_geotiff(str(path))
np.testing.assert_array_equal(np.asarray(r.values), arr)


def test_uint16_miniswhite_round_trip(tmp_path):
info = np.iinfo(np.uint16)
arr = np.array([[0, 1, info.max // 2, info.max - 1, info.max]],
dtype=np.uint16)
path = tmp_path / 'u16_msw_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
r = open_geotiff(str(path))
np.testing.assert_array_equal(np.asarray(r.values), arr)


def test_float32_miniswhite_round_trip(tmp_path):
arr = np.array([[-3.5, 0.0, 0.25, 7.5]], dtype=np.float32)
path = tmp_path / 'f32_msw_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
r = open_geotiff(str(path))
np.testing.assert_allclose(np.asarray(r.values), arr)


def test_miniswhite_with_nodata_round_trip(tmp_path):
arr = np.array([[10.0, np.nan, 20.0, 30.0]], dtype=np.float32)
path = tmp_path / 'f32_msw_nd_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite', nodata=-9999.0)
r = open_geotiff(str(path))
out = np.asarray(r.values)
assert np.isnan(out[0, 1]), (
"nodata position must round-trip back to NaN after MinIsWhite "
f"inversion; got {out!r}"
)
np.testing.assert_allclose(out[0, [0, 2, 3]], arr[0, [0, 2, 3]])


def test_miniswhite_int_passthrough(tmp_path):
"""The reader does not invert signed integer MinIsWhite data, so the
writer must also pass it through unchanged. Otherwise the round-trip
would silently corrupt signed data."""
arr = np.array([[-5, -1, 0, 1, 5]], dtype=np.int16)
path = tmp_path / 'i16_msw_1836.tif'
to_geotiff(_da(arr), str(path), photometric='miniswhite')
r = open_geotiff(str(path))
np.testing.assert_array_equal(np.asarray(r.values), arr)


def test_uint16_miniswhite_with_in_range_nodata_round_trip(tmp_path):
"""Cover the unsigned-nodata inversion branch: an in-range integer
sentinel must round-trip back to NaN despite the writer pre-inverting
both pixels and the sentinel value.

The on-disk sentinel byte is ``iinfo(uint16).max - 9999 = 55536``,
which collides with a real ``55536`` pixel post-inversion only when
the reader's mask compares against the right sentinel. The mid-range
pixels ``200`` and ``65000`` exercise both ends of the dtype range.
"""
arr = np.array([[10, 9999, 200, 65000]], dtype=np.uint16)
path = tmp_path / 'u16_msw_nd_1836.tif'
da_in = xr.DataArray(
arr,
dims=('y', 'x'),
coords={'y': np.arange(1, dtype=np.float64),
'x': np.arange(4, dtype=np.float64)},
attrs={'res': (1.0, 1.0), 'nodata': 9999},
)
to_geotiff(da_in, str(path), photometric='miniswhite', nodata=9999)
r = open_geotiff(str(path))
out = np.asarray(r.values)
# The reader casts uint16 to float to express NaN at the masked
# position. The other three pixels must round-trip exactly.
assert np.isnan(out[0, 1]), (
f"in-range uint nodata must round-trip to NaN through the "
f"writer-side sentinel inversion; got {out!r}"
)
np.testing.assert_array_equal(
out[0, [0, 2, 3]].astype(np.uint16), arr[0, [0, 2, 3]])


def test_miniswhite_rejected_with_cog_no_overviews(tmp_path):
arr = np.zeros((512, 512), dtype=np.uint8)
path = tmp_path / 'cog_msw_1836.tif'
with pytest.raises(NotImplementedError, match='miniswhite'):
to_geotiff(_da(arr), str(path), photometric='miniswhite', cog=True)


def test_miniswhite_rejected_with_explicit_overviews_no_cog(tmp_path):
"""Isolate the ``overview_levels`` branch: ``cog=False`` so the cog
gate alone would not trigger, only the overview branch does.
"""
from xrspatial.geotiff._writer import write as _eager_write
arr = np.zeros((256, 256), dtype=np.uint8)
path = str(tmp_path / 'ov_msw_1836.tif')
with pytest.raises(NotImplementedError, match='miniswhite'):
_eager_write(arr, path, photometric='miniswhite',
cog=False, overview_levels=[2])


def test_miniswhite_rejected_on_dask_path(tmp_path):
"""``to_geotiff`` on a dask-backed DataArray must refuse miniswhite
because the streaming writer does not pre-invert pixels per tile.
"""
dask_array = pytest.importorskip("dask.array")
arr = dask_array.zeros((64, 64), dtype=np.uint8, chunks=32)
da_in = xr.DataArray(
arr,
dims=('y', 'x'),
coords={'y': np.arange(64, dtype=np.float64),
'x': np.arange(64, dtype=np.float64)},
attrs={'res': (1.0, 1.0)},
)
path = tmp_path / 'dask_msw_1836.tif'
with pytest.raises(NotImplementedError, match='miniswhite'):
to_geotiff(da_in, str(path), photometric='miniswhite')


def test_miniswhite_rejected_with_extra_tags_photometric_override(tmp_path):
"""An ``extra_tags`` TAG_PHOTOMETRIC override that disagrees with the
kwarg-resolved photometric for the MinIsWhite single-band case must
be refused -- otherwise the writer transforms pixels for one tag
while the IFD advertises a different one.
"""
from xrspatial.geotiff._writer import write as _eager_write
from xrspatial.geotiff._dtypes import SHORT
from xrspatial.geotiff._header import TAG_PHOTOMETRIC
arr = np.array([[10, 20]], dtype=np.uint8)
path = str(tmp_path / 'extras_msw_1836.tif')
# photometric kwarg defaults to MinIsBlack (1), extra_tags forces
# MinIsWhite (0) on disk. Without the guard this would write pixels
# in MinIsBlack domain but tag them MinIsWhite, so the reader would
# invert and corrupt them.
with pytest.raises(ValueError, match='TAG_PHOTOMETRIC'):
_eager_write(
arr, path,
photometric='auto',
extra_tags=[(TAG_PHOTOMETRIC, SHORT, 1, 0)],
)
Loading