Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,19 @@ def parse_ifd(data: bytes | memoryview, offset: int,
"""
bo = header.byte_order
is_big = header.is_bigtiff
data_len = len(data)

# Bounds-check the num_entries field before unpacking. A truncated
# or crafted file used to escape as `struct.error` here, which is
# outside the documented ValueError contract. Negative offsets must
# also be rejected because `struct.unpack_from` interprets them with
# Python's negative-index semantics (reading from the buffer end).
num_entries_size = 8 if is_big else 2
if offset < 0 or offset + num_entries_size > data_len:
raise ValueError(
f"IFD num_entries at offset {offset} needs "
f"{num_entries_size} bytes but file length is {data_len}"
)
Comment on lines +485 to +490

if is_big:
num_entries = struct.unpack_from(f'{bo}Q', data, offset)[0]
Expand All @@ -485,6 +498,18 @@ def parse_ifd(data: bytes | memoryview, offset: int,
entry_offset = offset + 2
entry_size = 12

# Bounds-check the entry table itself. Each entry is `entry_size`
# bytes; without this guard a short buffer would hit `struct.error`
# on the first unpack inside the loop below. `entry_offset` is
# derived from `offset` and inherits its sign; reject negatives so
# `struct.unpack_from` cannot index from the buffer end.
entry_table_end = entry_offset + num_entries * entry_size
if entry_offset < 0 or entry_table_end > data_len:
raise ValueError(
f"IFD entry table [{entry_offset}, {entry_table_end}) for "
f"num_entries={num_entries} exceeds file length {data_len}"
)

inline_max = 8 if is_big else 4
entries = {}

Expand All @@ -497,7 +522,6 @@ def parse_ifd(data: bytes | memoryview, offset: int,
TAG_TILE_BYTE_COUNTS,
TAG_COLORMAP,
}
data_len = len(data)

for i in range(num_entries):
eo = entry_offset + i * entry_size
Expand Down Expand Up @@ -555,8 +579,17 @@ def parse_ifd(data: bytes | memoryview, offset: int,

entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value)

# Next IFD offset
next_offset_pos = entry_offset + num_entries * entry_size
# Next IFD offset. Bounds-check before unpack so a truncated file
# raises ValueError rather than struct.error. `next_offset_pos`
# inherits sign from `offset` via `entry_table_end`, so reject any
# negative position before `struct.unpack_from` can wrap around.
next_offset_pos = entry_table_end
next_offset_size = 8 if is_big else 4
if next_offset_pos < 0 or next_offset_pos + next_offset_size > data_len:
raise ValueError(
f"IFD next-IFD pointer at offset {next_offset_pos} needs "
f"{next_offset_size} bytes but file length is {data_len}"
)
if is_big:
next_ifd = struct.unpack_from(f'{bo}Q', data, next_offset_pos)[0]
else:
Expand Down
89 changes: 89 additions & 0 deletions xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,92 @@ def test_regression_empty_sample_format_tuple_does_not_indexerror():
assert isinstance(da, xr.DataArray)
except ValueError:
pass


# --- Group 4: truncation fuzz on the IFD entry table (#1672) ---
#
# The byte-mutation fuzz in Group 3 flips one byte; it doesn't cover the
# case where the file is truncated before `parse_ifd` has even finished
# reading the entry table. The S2 typed-error work in #1661 fixed the
# *value area* of each entry; the entry table itself (num_entries,
# tag/type/count fields, next-IFD pointer) was still unguarded and
# escaped with `struct.error` on truncation.

from xrspatial.geotiff._header import parse_all_ifds, parse_header # noqa: E402


# Reuse the same corpus from Group 3; truncating each one covers
# little- and big-endian, strip and tile, with and without geo tags.
@pytest.mark.parametrize(
"label,base_tiff", _CORPUS, ids=[lab for lab, _ in _CORPUS]
)
@given(offset_frac=st.floats(min_value=0.0, max_value=1.0))
@settings(
max_examples=80,
deadline=None,
suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture],
)
def test_truncation_typed_errors_only(label, base_tiff, offset_frac):
"""Truncating a valid TIFF at any byte must raise a typed error.

For every truncation point the parser must either succeed (the cut
landed past the IFD chain), raise ValueError / TypeError, or raise
one of the documented memory/overflow refusals. `struct.error` from
`unpack_from` walking off the buffer is the failure mode we are
guarding against.
"""
cut = int(offset_frac * len(base_tiff))
truncated = base_tiff[:cut]

try:
header = parse_header(truncated)
parse_all_ifds(truncated, header)
except ALLOWED_PARSE_EXCEPTIONS:
return
except (MemoryError, OverflowError):
return
except struct.error as exc:
pytest.fail(
f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) "
f"raised struct.error: {exc!r}"
)
except Exception as exc:
pytest.fail(
f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) "
f"raised non-typed {type(exc).__name__}: {exc!r}"
)


# Targeted examples for each of the three #1672 bounds checks. These
# pin the regression for the exact offsets the fuzz sweeps over and
# give a fast-fail signal if any check is reverted.

@example(byte_count=8) # Cuts before num_entries.
@example(byte_count=9) # Splits the 2-byte num_entries field.
@example(byte_count=20) # Splits the first entry's tag/type/count.
@given(byte_count=st.integers(min_value=0, max_value=400))
@settings(
max_examples=40,
deadline=None,
suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture],
)
def test_truncation_in_entry_table_is_valueerror(byte_count):
"""Every truncation inside the IFD entry table raises a typed error."""
base = make_minimal_tiff(4, 4, np.dtype('float32'))
truncated = base[:byte_count]
try:
header = parse_header(truncated)
parse_all_ifds(truncated, header)
except ALLOWED_PARSE_EXCEPTIONS:
return
except (MemoryError, OverflowError):
return
except struct.error as exc:
pytest.fail(
f"truncation to {byte_count} bytes raised struct.error: {exc!r}"
)
except Exception as exc:
pytest.fail(
f"truncation to {byte_count} bytes raised non-typed "
f"{type(exc).__name__}: {exc!r}"
)
Loading
Loading