Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1950,6 +1950,29 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,
f"window={window} is outside the source extent "
f"({ifd.height}x{ifd.width}) or has non-positive size.")

# Validate ``band`` against the selected IFD's sample count.
# Without this, ``band=-1`` silently selects the last channel
# via numpy negative indexing and ``band>=samples_per_pixel``
# leaks a raw numpy ``IndexError`` with the internal slice
# shape. Mirrors the dask path's pre-flight validator (see
# ``read_geotiff_dask`` in ``__init__.py``) and the GPU path
# so all backends agree on the contract: 0-based non-negative
# index only. See issue #1673.
#
# NOTE: the HTTP branch (``_read_cog_http`` above) currently
# accepts ``band`` but never slices on it; issue #1669 will
# add the slice. When that lands, mirror this same check
# there so HTTP rejects the same inputs.
ifd_samples = ifd.samples_per_pixel
if band is not None:
if ifd_samples <= 1:
if band != 0:
raise IndexError(
f"band={band} requested on a single-band file.")
elif not 0 <= band < ifd_samples:
raise IndexError(
f"band={band} out of range for {ifd_samples}-band file.")

if ifd.is_tiled:
arr = _read_tiles(data, ifd, header, dtype, window,
max_pixels=max_pixels)
Expand Down
2 changes: 1 addition & 1 deletion xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def read_vrt(vrt_path: str, *, window=None,
# indexing would silently accept negative values (``vrt.bands[-1]``
# returns the last band) and raise an opaque ``IndexError`` for
# out-of-range positive values, neither of which is the contract the
# public API documents (``band`` is a 0-based positive index). Reject
# public API documents (``band`` is a 0-based non-negative index). Reject
# both up front with a clear ``ValueError`` so callers do not get
# band-N data paired with band-0's nodata sentinel or a downstream
# IndexError from deep in the read path.
Expand Down
123 changes: 123 additions & 0 deletions xrspatial/geotiff/tests/test_band_validation_1673.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Regression tests for issue #1673.

``read_to_array`` accepts a ``band`` argument and applies it to the
decoded array via ``arr[:, :, band]`` without validating the index.
Two failure modes follow:

* ``band=-1`` silently selects the last channel via numpy negative
indexing. The public contract is "0-based non-negative index", so
this is a silent semantic shift, not an explicit selection.
* ``band=N`` with ``N >= samples_per_pixel`` raises a raw numpy
``IndexError`` whose message ("index N is out of bounds for axis
2 with size M") leaks the internal slice shape.

The dask path (``read_geotiff_dask``) and the GPU path both validate
``band`` up front and raise ``IndexError("band=N out of range for
M-band file.")``. These tests pin the local eager path to the same
contract so backend parity holds.
"""
from __future__ import annotations

import numpy as np
import pytest
import xarray as xr


@pytest.fixture
def multiband_tiff_path(tmp_path):
"""4x6 three-band tiled tiff for band-validation tests."""
from xrspatial.geotiff import to_geotiff

arr = np.arange(72, dtype=np.float32).reshape(4, 6, 3)
da = xr.DataArray(
arr,
dims=['y', 'x', 'band'],
coords={
'y': np.array([0.5, 1.5, 2.5, 3.5]),
'x': np.array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5]),
'band': [0, 1, 2],
},
attrs={'crs': 4326},
)
p = tmp_path / 'mb_1673.tif'
to_geotiff(da, str(p), tile_size=4)
return str(p), arr


def test_read_to_array_negative_band_rejected(multiband_tiff_path):
"""``band=-1`` no longer silently selects the last channel."""
from xrspatial.geotiff import read_to_array

path, _ = multiband_tiff_path
with pytest.raises(IndexError, match="band=-1 out of range"):
read_to_array(path, band=-1)


def test_read_to_array_band_equal_to_samples_rejected(multiband_tiff_path):
"""``band=samples_per_pixel`` (off-by-one) raises a typed error."""
from xrspatial.geotiff import read_to_array

path, _ = multiband_tiff_path
# File has 3 bands; valid indices are 0, 1, 2.
with pytest.raises(IndexError, match="band=3 out of range"):
read_to_array(path, band=3)


def test_read_to_array_band_far_above_samples_rejected(multiband_tiff_path):
"""A wildly out-of-range band index gives the same typed error."""
from xrspatial.geotiff import read_to_array

path, _ = multiband_tiff_path
with pytest.raises(IndexError, match="band=103 out of range"):
read_to_array(path, band=103)


def test_read_to_array_valid_band_still_works(multiband_tiff_path):
"""Valid band indices keep working after the validation guard."""
from xrspatial.geotiff import read_to_array

path, arr = multiband_tiff_path
out, _ = read_to_array(path, band=1)
np.testing.assert_array_equal(out, arr[:, :, 1])


def test_read_to_array_band_none_still_returns_all_bands(multiband_tiff_path):
"""``band=None`` still returns the full multi-band array."""
from xrspatial.geotiff import read_to_array

path, arr = multiband_tiff_path
out, _ = read_to_array(path)
np.testing.assert_array_equal(out, arr)


def test_backend_parity_negative_band(multiband_tiff_path):
"""Local eager and dask paths raise the same error for ``band=-1``."""
from xrspatial.geotiff import read_geotiff_dask, read_to_array

path, _ = multiband_tiff_path

with pytest.raises(IndexError) as eager_exc:
read_to_array(path, band=-1)
with pytest.raises(IndexError) as dask_exc:
read_geotiff_dask(path, chunks=4, band=-1)

# Same error type and same diagnostic substring; the dask message
# is "band=-1 out of range for 3-band file." so any 0-based caller
# gets identical signal regardless of which backend they pick.
assert "band=-1 out of range" in str(eager_exc.value)
assert "band=-1 out of range" in str(dask_exc.value)


def test_backend_parity_band_equal_to_samples(multiband_tiff_path):
"""Local eager and dask paths agree on the off-by-one rejection."""
from xrspatial.geotiff import read_geotiff_dask, read_to_array

path, _ = multiband_tiff_path

with pytest.raises(IndexError) as eager_exc:
read_to_array(path, band=3)
with pytest.raises(IndexError) as dask_exc:
read_geotiff_dask(path, chunks=4, band=3)

assert "band=3 out of range" in str(eager_exc.value)
assert "band=3 out of range" in str(dask_exc.value)
Loading