From 401720ddae74900129ba2df9af06a8041ebfb949 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:13:00 -0700 Subject: [PATCH 1/2] Validate band index in read_to_array for backend parity (#1673) The local eager path applied `arr[:, :, band]` with no pre-flight check, so `band=-1` silently selected the last channel via numpy negative indexing and `band>=samples_per_pixel` leaked a raw numpy IndexError with the internal slice shape. Add the same validator the dask and GPU paths already run, raising `IndexError("band=N out of range for M-band file.")` so all three backends agree on the contract: 0-based positive index only. The HTTP branch accepts `band` but does not slice on it yet; issue #1669 will add the slice. A note in the new validator flags that the same check should land there at that point. --- xrspatial/geotiff/_reader.py | 23 ++++ .../tests/test_band_validation_1673.py | 123 ++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_band_validation_1673.py diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 3dc79409..cdcf87bf 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1950,6 +1950,29 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, f"window={window} is outside the source extent " f"({ifd.height}x{ifd.width}) or has non-positive size.") + # Validate ``band`` against the selected IFD's sample count. + # Without this, ``band=-1`` silently selects the last channel + # via numpy negative indexing and ``band>=samples_per_pixel`` + # leaks a raw numpy ``IndexError`` with the internal slice + # shape. Mirrors the dask path's pre-flight validator (see + # ``read_geotiff_dask`` in ``__init__.py``) and the GPU path + # so all backends agree on the contract: 0-based positive + # index only. See issue #1673. + # + # NOTE: the HTTP branch (``_read_cog_http`` above) currently + # accepts ``band`` but never slices on it; issue #1669 will + # add the slice. When that lands, mirror this same check + # there so HTTP rejects the same inputs. + ifd_samples = ifd.samples_per_pixel + if band is not None: + if ifd_samples <= 1: + if band != 0: + raise IndexError( + f"band={band} requested on a single-band file.") + elif not 0 <= band < ifd_samples: + raise IndexError( + f"band={band} out of range for {ifd_samples}-band file.") + if ifd.is_tiled: arr = _read_tiles(data, ifd, header, dtype, window, max_pixels=max_pixels) diff --git a/xrspatial/geotiff/tests/test_band_validation_1673.py b/xrspatial/geotiff/tests/test_band_validation_1673.py new file mode 100644 index 00000000..0421b1f2 --- /dev/null +++ b/xrspatial/geotiff/tests/test_band_validation_1673.py @@ -0,0 +1,123 @@ +"""Regression tests for issue #1673. + +``read_to_array`` accepts a ``band`` argument and applies it to the +decoded array via ``arr[:, :, band]`` without validating the index. +Two failure modes follow: + +* ``band=-1`` silently selects the last channel via numpy negative + indexing. The public contract is "0-based positive index", so + this is a silent semantic shift, not an explicit selection. +* ``band=N`` with ``N >= samples_per_pixel`` raises a raw numpy + ``IndexError`` whose message ("index N is out of bounds for axis + 2 with size M") leaks the internal slice shape. + +The dask path (``read_geotiff_dask``) and the GPU path both validate +``band`` up front and raise ``IndexError("band=N out of range for +M-band file.")``. These tests pin the local eager path to the same +contract so backend parity holds. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + + +@pytest.fixture +def multiband_tiff_path(tmp_path): + """4x6 three-band tiled tiff for band-validation tests.""" + from xrspatial.geotiff import to_geotiff + + arr = np.arange(72, dtype=np.float32).reshape(4, 6, 3) + da = xr.DataArray( + arr, + dims=['y', 'x', 'band'], + coords={ + 'y': np.array([0.5, 1.5, 2.5, 3.5]), + 'x': np.array([0.5, 1.5, 2.5, 3.5, 4.5, 5.5]), + 'band': [0, 1, 2], + }, + attrs={'crs': 4326}, + ) + p = tmp_path / 'mb_1673.tif' + to_geotiff(da, str(p), tile_size=4) + return str(p), arr + + +def test_read_to_array_negative_band_rejected(multiband_tiff_path): + """``band=-1`` no longer silently selects the last channel.""" + from xrspatial.geotiff import read_to_array + + path, _ = multiband_tiff_path + with pytest.raises(IndexError, match="band=-1 out of range"): + read_to_array(path, band=-1) + + +def test_read_to_array_band_equal_to_samples_rejected(multiband_tiff_path): + """``band=samples_per_pixel`` (off-by-one) raises a typed error.""" + from xrspatial.geotiff import read_to_array + + path, _ = multiband_tiff_path + # File has 3 bands; valid indices are 0, 1, 2. + with pytest.raises(IndexError, match="band=3 out of range"): + read_to_array(path, band=3) + + +def test_read_to_array_band_far_above_samples_rejected(multiband_tiff_path): + """A wildly out-of-range band index gives the same typed error.""" + from xrspatial.geotiff import read_to_array + + path, _ = multiband_tiff_path + with pytest.raises(IndexError, match="band=103 out of range"): + read_to_array(path, band=103) + + +def test_read_to_array_valid_band_still_works(multiband_tiff_path): + """Valid band indices keep working after the validation guard.""" + from xrspatial.geotiff import read_to_array + + path, arr = multiband_tiff_path + out, _ = read_to_array(path, band=1) + np.testing.assert_array_equal(out, arr[:, :, 1]) + + +def test_read_to_array_band_none_still_returns_all_bands(multiband_tiff_path): + """``band=None`` still returns the full multi-band array.""" + from xrspatial.geotiff import read_to_array + + path, arr = multiband_tiff_path + out, _ = read_to_array(path) + np.testing.assert_array_equal(out, arr) + + +def test_backend_parity_negative_band(multiband_tiff_path): + """Local eager and dask paths raise the same error for ``band=-1``.""" + from xrspatial.geotiff import read_geotiff_dask, read_to_array + + path, _ = multiband_tiff_path + + with pytest.raises(IndexError) as eager_exc: + read_to_array(path, band=-1) + with pytest.raises(IndexError) as dask_exc: + read_geotiff_dask(path, chunks=4, band=-1) + + # Same error type and same diagnostic substring; the dask message + # is "band=-1 out of range for 3-band file." so any 0-based caller + # gets identical signal regardless of which backend they pick. + assert "band=-1 out of range" in str(eager_exc.value) + assert "band=-1 out of range" in str(dask_exc.value) + + +def test_backend_parity_band_equal_to_samples(multiband_tiff_path): + """Local eager and dask paths agree on the off-by-one rejection.""" + from xrspatial.geotiff import read_geotiff_dask, read_to_array + + path, _ = multiband_tiff_path + + with pytest.raises(IndexError) as eager_exc: + read_to_array(path, band=3) + with pytest.raises(IndexError) as dask_exc: + read_geotiff_dask(path, chunks=4, band=3) + + assert "band=3 out of range" in str(eager_exc.value) + assert "band=3 out of range" in str(dask_exc.value) From 8aa1a73ee30d1c09137d0592d5665accd48912f6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:43:35 -0700 Subject: [PATCH 2/2] docs(geotiff): reword band contract as "0-based non-negative index" Addresses Copilot review feedback on PR #1676: "positive" excludes 0 in standard math usage, but band=0 is valid. Switch to "non-negative" in the three places the contract is documented (_reader.py comment, _vrt.py comment, test_band_validation_1673.py module docstring). --- xrspatial/geotiff/_reader.py | 2 +- xrspatial/geotiff/_vrt.py | 2 +- xrspatial/geotiff/tests/test_band_validation_1673.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index cdcf87bf..a08d107d 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1956,7 +1956,7 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, # leaks a raw numpy ``IndexError`` with the internal slice # shape. Mirrors the dask path's pre-flight validator (see # ``read_geotiff_dask`` in ``__init__.py``) and the GPU path - # so all backends agree on the contract: 0-based positive + # so all backends agree on the contract: 0-based non-negative # index only. See issue #1673. # # NOTE: the HTTP branch (``_read_cog_http`` above) currently diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 031246b5..5fd3e86a 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -269,7 +269,7 @@ def read_vrt(vrt_path: str, *, window=None, # indexing would silently accept negative values (``vrt.bands[-1]`` # returns the last band) and raise an opaque ``IndexError`` for # out-of-range positive values, neither of which is the contract the - # public API documents (``band`` is a 0-based positive index). Reject + # public API documents (``band`` is a 0-based non-negative index). Reject # both up front with a clear ``ValueError`` so callers do not get # band-N data paired with band-0's nodata sentinel or a downstream # IndexError from deep in the read path. diff --git a/xrspatial/geotiff/tests/test_band_validation_1673.py b/xrspatial/geotiff/tests/test_band_validation_1673.py index 0421b1f2..5d4c57f7 100644 --- a/xrspatial/geotiff/tests/test_band_validation_1673.py +++ b/xrspatial/geotiff/tests/test_band_validation_1673.py @@ -5,7 +5,7 @@ Two failure modes follow: * ``band=-1`` silently selects the last channel via numpy negative - indexing. The public contract is "0-based positive index", so + indexing. The public contract is "0-based non-negative index", so this is a silent semantic shift, not an explicit selection. * ``band=N`` with ``N >= samples_per_pixel`` raises a raw numpy ``IndexError`` whose message ("index N is out of bounds for axis