From 23672d0035d3a11b1beb467f42445946ace15786 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:13:11 -0700 Subject: [PATCH 1/2] Bounds-check IFD entry table in parse_ifd (#1672) `parse_ifd` was unpacking `num_entries`, every entry's tag/type/count, and the next-IFD pointer with no prior length check. A truncated or crafted TIFF could escape with `struct.error`, outside the ValueError/TypeError contract the #1661 fuzz tests assume. The S2 typed-error work in #1661 covered the IFD value area but not the entry table itself. Adds three explicit bounds checks (num_entries field, full entry table, next-IFD pointer) each raising ValueError with the offset, required byte count, and file length, matching the existing MAX_IFD_ENTRY_COUNT / value-area error message style. Tests: - test_ifd_entry_table_bounds_1672.py: one-byte-short buffers for each of the three new checks, in classic and BigTIFF layouts. - test_fuzz_hypothesis_1661.py: new Group 4 truncation property tests sweeping every byte offset; fails on struct.error escape. --- xrspatial/geotiff/_header.py | 33 ++- .../tests/test_fuzz_hypothesis_1661.py | 89 ++++++++ .../tests/test_ifd_entry_table_bounds_1672.py | 193 ++++++++++++++++++ 3 files changed, 312 insertions(+), 3 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 9705b9c6..ee3bdd82 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -475,6 +475,17 @@ def parse_ifd(data: bytes | memoryview, offset: int, """ bo = header.byte_order is_big = header.is_bigtiff + data_len = len(data) + + # Bounds-check the num_entries field before unpacking. A truncated + # or crafted file used to escape as `struct.error` here, which is + # outside the documented ValueError contract. + num_entries_size = 8 if is_big else 2 + if offset + num_entries_size > data_len: + raise ValueError( + f"IFD num_entries at offset {offset} needs " + f"{num_entries_size} bytes but file length is {data_len}" + ) if is_big: num_entries = struct.unpack_from(f'{bo}Q', data, offset)[0] @@ -485,6 +496,16 @@ def parse_ifd(data: bytes | memoryview, offset: int, entry_offset = offset + 2 entry_size = 12 + # Bounds-check the entry table itself. Each entry is `entry_size` + # bytes; without this guard a short buffer would hit `struct.error` + # on the first unpack inside the loop below. + entry_table_end = entry_offset + num_entries * entry_size + if entry_table_end > data_len: + raise ValueError( + f"IFD entry table [{entry_offset}, {entry_table_end}) for " + f"num_entries={num_entries} exceeds file length {data_len}" + ) + inline_max = 8 if is_big else 4 entries = {} @@ -497,7 +518,6 @@ def parse_ifd(data: bytes | memoryview, offset: int, TAG_TILE_BYTE_COUNTS, TAG_COLORMAP, } - data_len = len(data) for i in range(num_entries): eo = entry_offset + i * entry_size @@ -555,8 +575,15 @@ def parse_ifd(data: bytes | memoryview, offset: int, entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value) - # Next IFD offset - next_offset_pos = entry_offset + num_entries * entry_size + # Next IFD offset. Bounds-check before unpack so a truncated file + # raises ValueError rather than struct.error. + next_offset_pos = entry_table_end + next_offset_size = 8 if is_big else 4 + if next_offset_pos + next_offset_size > data_len: + raise ValueError( + f"IFD next-IFD pointer at offset {next_offset_pos} needs " + f"{next_offset_size} bytes but file length is {data_len}" + ) if is_big: next_ifd = struct.unpack_from(f'{bo}Q', data, next_offset_pos)[0] else: diff --git a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py index cd1ee73c..f31a40d7 100644 --- a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py +++ b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py @@ -321,3 +321,92 @@ def test_regression_empty_sample_format_tuple_does_not_indexerror(): assert isinstance(da, xr.DataArray) except ValueError: pass + + +# --- Group 4: truncation fuzz on the IFD entry table (#1672) --- +# +# The byte-mutation fuzz in Group 3 flips one byte; it doesn't cover the +# case where the file is truncated before `parse_ifd` has even finished +# reading the entry table. The S2 typed-error work in #1661 fixed the +# *value area* of each entry; the entry table itself (num_entries, +# tag/type/count fields, next-IFD pointer) was still unguarded and +# escaped with `struct.error` on truncation. + +from xrspatial.geotiff._header import parse_all_ifds, parse_header # noqa: E402 + + +# Reuse the same corpus from Group 3; truncating each one covers +# little- and big-endian, strip and tile, with and without geo tags. +@pytest.mark.parametrize( + "label,base_tiff", _CORPUS, ids=[lab for lab, _ in _CORPUS] +) +@given(offset_frac=st.floats(min_value=0.0, max_value=1.0)) +@settings( + max_examples=80, + deadline=None, + suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture], +) +def test_truncation_typed_errors_only(label, base_tiff, offset_frac): + """Truncating a valid TIFF at any byte must raise a typed error. + + For every truncation point the parser must either succeed (the cut + landed past the IFD chain), raise ValueError / TypeError, or raise + one of the documented memory/overflow refusals. `struct.error` from + `unpack_from` walking off the buffer is the failure mode we are + guarding against. + """ + cut = int(offset_frac * len(base_tiff)) + truncated = base_tiff[:cut] + + try: + header = parse_header(truncated) + parse_all_ifds(truncated, header) + except ALLOWED_PARSE_EXCEPTIONS: + return + except (MemoryError, OverflowError): + return + except struct.error as exc: + pytest.fail( + f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) " + f"raised struct.error: {exc!r}" + ) + except Exception as exc: + pytest.fail( + f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) " + f"raised non-typed {type(exc).__name__}: {exc!r}" + ) + + +# Targeted examples for each of the three #1672 bounds checks. These +# pin the regression for the exact offsets the fuzz sweeps over and +# give a fast-fail signal if any check is reverted. + +@example(byte_count=8) # Cuts before num_entries. +@example(byte_count=9) # Splits the 2-byte num_entries field. +@example(byte_count=20) # Splits the first entry's tag/type/count. +@given(byte_count=st.integers(min_value=0, max_value=400)) +@settings( + max_examples=40, + deadline=None, + suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture], +) +def test_truncation_in_entry_table_is_valueerror(byte_count): + """Every truncation inside the IFD entry table raises a typed error.""" + base = make_minimal_tiff(4, 4, np.dtype('float32')) + truncated = base[:byte_count] + try: + header = parse_header(truncated) + parse_all_ifds(truncated, header) + except ALLOWED_PARSE_EXCEPTIONS: + return + except (MemoryError, OverflowError): + return + except struct.error as exc: + pytest.fail( + f"truncation to {byte_count} bytes raised struct.error: {exc!r}" + ) + except Exception as exc: + pytest.fail( + f"truncation to {byte_count} bytes raised non-typed " + f"{type(exc).__name__}: {exc!r}" + ) diff --git a/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py b/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py new file mode 100644 index 00000000..3aa60c94 --- /dev/null +++ b/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py @@ -0,0 +1,193 @@ +"""Bounds checks for the IFD entry table itself (issue #1672). + +The S2 work in #1661 added typed-error guards inside `parse_ifd` for the +*value area* of each entry (count cap, byte cap, value-range vs EOF). It +did not cover three earlier `struct.unpack_from` calls that read from the +entry table itself: + +1. The `num_entries` field at the start of an IFD. +2. Each entry's tag/type/count fields inside the per-entry loop. +3. The next-IFD pointer immediately following the entry table. + +A truncated or crafted buffer that lands on any of these three reads used +to escape with `struct.error`, which is not in the documented +`ValueError`/`TypeError` contract the Hypothesis fuzz tests rely on. + +These tests build buffers that are exactly one byte short of the +respective read and assert the new `ValueError`s. +""" +from __future__ import annotations + +import struct + +import pytest + +from xrspatial.geotiff._dtypes import LONG +from xrspatial.geotiff._header import ( + TIFFHeader, + TAG_IMAGE_WIDTH, + parse_header, + parse_ifd, +) + + +# --- Classic TIFF --- + + +def _build_minimal_classic_ifd(num_entries: int = 1) -> bytes: + """Build a complete little-endian classic TIFF with `num_entries` entries. + + The single entry is `TAG_IMAGE_WIDTH` (LONG, count=1, value=1) so the + overall file is otherwise well-formed. The returned bytes can be + sliced shorter to exercise the bounds checks. + """ + bo = '<' + ifd_offset = 8 + buf = bytearray() + buf.extend(b'II') + buf.extend(struct.pack(f'{bo}H', 42)) + buf.extend(struct.pack(f'{bo}I', ifd_offset)) + # IFD: num_entries (2) + n*12 + next_ifd (4) + buf.extend(struct.pack(f'{bo}H', num_entries)) + for _ in range(num_entries): + buf.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, LONG, 1)) + buf.extend(struct.pack(f'{bo}I', 1)) # inline value = 1 + buf.extend(struct.pack(f'{bo}I', 0)) # next IFD offset + return bytes(buf) + + +def test_classic_num_entries_truncated_raises_valueerror(): + """Buffer ending mid-`num_entries` must raise ValueError, not struct.error.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # Slice off everything after offset 8 (the IFD start). `num_entries` + # is a 2-byte field starting at offset 8, so a 9-byte buffer is one + # byte short. + truncated = full[:9] + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(truncated, header.first_ifd_offset, header) + + +def test_classic_num_entries_zero_buffer_raises_valueerror(): + """Buffer ending exactly at the IFD offset must raise ValueError.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # 8-byte buffer: nothing at offset 8. + truncated = full[:8] + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(truncated, header.first_ifd_offset, header) + + +def test_classic_entry_table_truncated_raises_valueerror(): + """Buffer that cuts off mid-entry-table must raise ValueError.""" + full = _build_minimal_classic_ifd(num_entries=3) + header = parse_header(full) + # IFD layout: 2-byte count + 3 * 12-byte entries + 4-byte next-IFD + # Entry table occupies offsets 10..45. Truncate one byte before the + # entry table ends. + entry_table_end = 8 + 2 + 3 * 12 + truncated = full[:entry_table_end - 1] + with pytest.raises(ValueError, match="entry table"): + parse_ifd(truncated, header.first_ifd_offset, header) + + +def test_classic_next_ifd_truncated_raises_valueerror(): + """Buffer that ends right at the entry table (no next-IFD) raises.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # IFD: 8 + 2 + 12 = 22 is the start of the next-IFD field; 22+4=26 + # is the end. Slice to 22 (no next-IFD bytes) and 25 (one short). + next_ifd_pos = 8 + 2 + 12 + with pytest.raises(ValueError, match="next-IFD"): + parse_ifd(full[:next_ifd_pos], header.first_ifd_offset, header) + with pytest.raises(ValueError, match="next-IFD"): + parse_ifd(full[:next_ifd_pos + 3], header.first_ifd_offset, header) + + +# --- BigTIFF --- + + +def _build_minimal_bigtiff_ifd(num_entries: int = 1) -> bytes: + """Build a complete little-endian BigTIFF with `num_entries` entries.""" + bo = '<' + ifd_offset = 16 + buf = bytearray() + buf.extend(b'II') + buf.extend(struct.pack(f'{bo}H', 43)) + buf.extend(struct.pack(f'{bo}H', 8)) # offset size + buf.extend(b'\x00\x00') # reserved + buf.extend(struct.pack(f'{bo}Q', ifd_offset)) + # IFD: num_entries (8) + n*20 + next_ifd (8) + buf.extend(struct.pack(f'{bo}Q', num_entries)) + for _ in range(num_entries): + buf.extend(struct.pack(f'{bo}HH', TAG_IMAGE_WIDTH, LONG)) + buf.extend(struct.pack(f'{bo}Q', 1)) # count + buf.extend(struct.pack(f'{bo}Q', 1)) # inline value = 1 + buf.extend(struct.pack(f'{bo}Q', 0)) # next IFD offset + return bytes(buf) + + +def test_bigtiff_num_entries_truncated_raises_valueerror(): + """BigTIFF buffer ending mid-`num_entries` (8 bytes) raises ValueError.""" + full = _build_minimal_bigtiff_ifd(num_entries=1) + header = parse_header(full) + # `num_entries` is 8 bytes starting at offset 16. A 23-byte buffer is + # one byte short. + truncated = full[:23] + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(truncated, header.first_ifd_offset, header) + + +def test_bigtiff_entry_table_truncated_raises_valueerror(): + """BigTIFF buffer that cuts off mid-entry-table raises ValueError.""" + full = _build_minimal_bigtiff_ifd(num_entries=2) + header = parse_header(full) + # IFD layout: 8-byte count + 2 * 20-byte entries + 8-byte next-IFD + entry_table_end = 16 + 8 + 2 * 20 + truncated = full[:entry_table_end - 1] + with pytest.raises(ValueError, match="entry table"): + parse_ifd(truncated, header.first_ifd_offset, header) + + +def test_bigtiff_next_ifd_truncated_raises_valueerror(): + """BigTIFF buffer ending right at the entry table raises ValueError.""" + full = _build_minimal_bigtiff_ifd(num_entries=1) + header = parse_header(full) + next_ifd_pos = 16 + 8 + 20 + with pytest.raises(ValueError, match="next-IFD"): + parse_ifd(full[:next_ifd_pos], header.first_ifd_offset, header) + with pytest.raises(ValueError, match="next-IFD"): + parse_ifd(full[:next_ifd_pos + 7], header.first_ifd_offset, header) + + +# --- Sanity: a complete buffer still parses --- + + +def test_classic_complete_buffer_parses_ok(): + """The build helper produces a well-formed file that parses.""" + data = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(data) + ifd = parse_ifd(data, header.first_ifd_offset, header) + assert ifd.width == 1 + assert ifd.next_ifd_offset == 0 + + +def test_bigtiff_complete_buffer_parses_ok(): + """The BigTIFF helper produces a well-formed file that parses.""" + data = _build_minimal_bigtiff_ifd(num_entries=1) + header = parse_header(data) + ifd = parse_ifd(data, header.first_ifd_offset, header) + assert ifd.width == 1 + assert ifd.next_ifd_offset == 0 + + +# --- Regression: parse_ifd with a wildly out-of-bounds offset --- + + +def test_offset_past_eof_raises_valueerror(): + """Calling parse_ifd with offset > len(data) must not crash with struct.error.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # Walk completely off the end. + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(full, len(full) + 100, header) From b48c9b1ac4182cd1578275cec13198f9ee1c5b71 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:45:32 -0700 Subject: [PATCH 2/2] Reject negative offsets in parse_ifd + drop unused TIFFHeader import Addresses two Copilot review comments on PR #1677. The new bounds checks added in #1672 only guarded `offset + size > len(data)`, missing negative offsets. With a negative `offset`, `struct.unpack_from` interprets it as Python's negative-index semantic (read from the buffer end), silently returning wrong bytes or raising raw `struct.error` and escaping the typed `ValueError` contract. This commit adds explicit `< 0` checks on all three bounds-check sites (num_entries read, entry-table region, next-IFD pointer) and an unused `TIFFHeader` import is removed from the test module so flake8 stays clean. Tests cover negative offsets on both classic TIFF and BigTIFF, plus a large negative value, and reuse existing fixtures to keep the file self-contained. --- xrspatial/geotiff/_header.py | 18 +++-- .../tests/test_ifd_entry_table_bounds_1672.py | 79 ++++++++++++++++++- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index ee3bdd82..5263ba1d 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -479,9 +479,11 @@ def parse_ifd(data: bytes | memoryview, offset: int, # Bounds-check the num_entries field before unpacking. A truncated # or crafted file used to escape as `struct.error` here, which is - # outside the documented ValueError contract. + # outside the documented ValueError contract. Negative offsets must + # also be rejected because `struct.unpack_from` interprets them with + # Python's negative-index semantics (reading from the buffer end). num_entries_size = 8 if is_big else 2 - if offset + num_entries_size > data_len: + if offset < 0 or offset + num_entries_size > data_len: raise ValueError( f"IFD num_entries at offset {offset} needs " f"{num_entries_size} bytes but file length is {data_len}" @@ -498,9 +500,11 @@ def parse_ifd(data: bytes | memoryview, offset: int, # Bounds-check the entry table itself. Each entry is `entry_size` # bytes; without this guard a short buffer would hit `struct.error` - # on the first unpack inside the loop below. + # on the first unpack inside the loop below. `entry_offset` is + # derived from `offset` and inherits its sign; reject negatives so + # `struct.unpack_from` cannot index from the buffer end. entry_table_end = entry_offset + num_entries * entry_size - if entry_table_end > data_len: + if entry_offset < 0 or entry_table_end > data_len: raise ValueError( f"IFD entry table [{entry_offset}, {entry_table_end}) for " f"num_entries={num_entries} exceeds file length {data_len}" @@ -576,10 +580,12 @@ def parse_ifd(data: bytes | memoryview, offset: int, entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value) # Next IFD offset. Bounds-check before unpack so a truncated file - # raises ValueError rather than struct.error. + # raises ValueError rather than struct.error. `next_offset_pos` + # inherits sign from `offset` via `entry_table_end`, so reject any + # negative position before `struct.unpack_from` can wrap around. next_offset_pos = entry_table_end next_offset_size = 8 if is_big else 4 - if next_offset_pos + next_offset_size > data_len: + if next_offset_pos < 0 or next_offset_pos + next_offset_size > data_len: raise ValueError( f"IFD next-IFD pointer at offset {next_offset_pos} needs " f"{next_offset_size} bytes but file length is {data_len}" diff --git a/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py b/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py index 3aa60c94..2af97bd2 100644 --- a/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py +++ b/xrspatial/geotiff/tests/test_ifd_entry_table_bounds_1672.py @@ -24,7 +24,6 @@ from xrspatial.geotiff._dtypes import LONG from xrspatial.geotiff._header import ( - TIFFHeader, TAG_IMAGE_WIDTH, parse_header, parse_ifd, @@ -191,3 +190,81 @@ def test_offset_past_eof_raises_valueerror(): # Walk completely off the end. with pytest.raises(ValueError, match="num_entries"): parse_ifd(full, len(full) + 100, header) + + +# --- Negative offsets: each bounds-check site must reject them --- +# +# `struct.unpack_from(fmt, buf, off)` interprets a negative `off` with +# Python's negative-index semantic (reading from the buffer end). Without +# an explicit `< 0` guard, a negative offset can silently read the wrong +# bytes or raise raw `struct.error`, escaping the documented +# `ValueError`/`TypeError` contract. The three checks below each have +# their own `< 0` guard for defense-in-depth; the first one catches +# direct callers, the latter two protect against future refactors of +# offset arithmetic. + + +def test_classic_negative_offset_rejected_at_num_entries(): + """`offset < 0` must raise ValueError at the num_entries bounds check.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(full, -1, header) + + +def test_bigtiff_negative_offset_rejected_at_num_entries(): + """BigTIFF: negative `offset` must raise ValueError, not struct.error.""" + full = _build_minimal_bigtiff_ifd(num_entries=1) + header = parse_header(full) + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(full, -1, header) + + +def test_classic_large_negative_offset_rejected(): + """A very negative offset (further than len(data)) is still ValueError.""" + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + with pytest.raises(ValueError, match="num_entries"): + parse_ifd(full, -10_000, header) + + +def test_entry_table_negative_offset_guard(): + """The entry-table bounds check independently rejects negative positions. + + The first guard (num_entries) catches negative `offset` before this + runs. To prove the entry-table guard is itself negative-safe we patch + `entry_offset` indirectly: there is no public seam for this, so we + instead assert the guard's behaviour by exercising a `num_entries` + large enough to overflow `entry_table_end` past `data_len` on a + minimal buffer, which is the same code path the negative guard + protects. + """ + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # Build a buffer where num_entries claims many more entries than the + # buffer can hold. parse_ifd should hit the entry-table guard. + bo = '<' + huge_count = 1000 + buf = bytearray(full[:8]) + buf.extend(struct.pack(f'{bo}H', huge_count)) + # Only one real entry follows, then EOF. + buf.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, LONG, 1)) + buf.extend(struct.pack(f'{bo}I', 1)) + with pytest.raises(ValueError, match="entry table"): + parse_ifd(bytes(buf), header.first_ifd_offset, header) + + +def test_next_ifd_negative_offset_guard(): + """The next-IFD bounds check independently rejects negative positions. + + Same rationale as the entry-table case: a public seam to push a + negative `next_offset_pos` past the earlier guards does not exist, + so we exercise the matching code path with a buffer that ends right + at the entry-table edge. + """ + full = _build_minimal_classic_ifd(num_entries=1) + header = parse_header(full) + # Truncate to entry-table end so the next-IFD read has 0 bytes. + next_ifd_pos = 8 + 2 + 12 + with pytest.raises(ValueError, match="next-IFD"): + parse_ifd(full[:next_ifd_pos], header.first_ifd_offset, header)