xarray-contrib · brendancol · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py
@@ -475,6 +475,19 @@ def parse_ifd(data: bytes | memoryview, offset: int,
     """
     bo = header.byte_order
     is_big = header.is_bigtiff
+    data_len = len(data)
+
+    # Bounds-check the num_entries field before unpacking. A truncated
+    # or crafted file used to escape as `struct.error` here, which is
+    # outside the documented ValueError contract. Negative offsets must
+    # also be rejected because `struct.unpack_from` interprets them with
+    # Python's negative-index semantics (reading from the buffer end).
+    num_entries_size = 8 if is_big else 2
+    if offset < 0 or offset + num_entries_size > data_len:
+        raise ValueError(
+            f"IFD num_entries at offset {offset} needs "
+            f"{num_entries_size} bytes but file length is {data_len}"
+        )
 
     if is_big:
         num_entries = struct.unpack_from(f'{bo}Q', data, offset)[0]
@@ -485,6 +498,18 @@ def parse_ifd(data: bytes | memoryview, offset: int,
         entry_offset = offset + 2
         entry_size = 12
 
+    # Bounds-check the entry table itself. Each entry is `entry_size`
+    # bytes; without this guard a short buffer would hit `struct.error`
+    # on the first unpack inside the loop below. `entry_offset` is
+    # derived from `offset` and inherits its sign; reject negatives so
+    # `struct.unpack_from` cannot index from the buffer end.
+    entry_table_end = entry_offset + num_entries * entry_size
+    if entry_offset < 0 or entry_table_end > data_len:
+        raise ValueError(
+            f"IFD entry table [{entry_offset}, {entry_table_end}) for "
+            f"num_entries={num_entries} exceeds file length {data_len}"
+        )
+
     inline_max = 8 if is_big else 4
     entries = {}
 
@@ -497,7 +522,6 @@ def parse_ifd(data: bytes | memoryview, offset: int,
         TAG_TILE_BYTE_COUNTS,
         TAG_COLORMAP,
     }
-    data_len = len(data)
 
     for i in range(num_entries):
         eo = entry_offset + i * entry_size
@@ -555,8 +579,17 @@ def parse_ifd(data: bytes | memoryview, offset: int,
 
         entries[tag] = IFDEntry(tag=tag, type_id=type_id, count=count, value=value)
 
-    # Next IFD offset
-    next_offset_pos = entry_offset + num_entries * entry_size
+    # Next IFD offset. Bounds-check before unpack so a truncated file
+    # raises ValueError rather than struct.error. `next_offset_pos`
+    # inherits sign from `offset` via `entry_table_end`, so reject any
+    # negative position before `struct.unpack_from` can wrap around.
+    next_offset_pos = entry_table_end
+    next_offset_size = 8 if is_big else 4
+    if next_offset_pos < 0 or next_offset_pos + next_offset_size > data_len:
+        raise ValueError(
+            f"IFD next-IFD pointer at offset {next_offset_pos} needs "
+            f"{next_offset_size} bytes but file length is {data_len}"
+        )
     if is_big:
         next_ifd = struct.unpack_from(f'{bo}Q', data, next_offset_pos)[0]
     else:

diff --git a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py
@@ -321,3 +321,92 @@ def test_regression_empty_sample_format_tuple_does_not_indexerror():
         assert isinstance(da, xr.DataArray)
     except ValueError:
         pass
+
+
+# --- Group 4: truncation fuzz on the IFD entry table (#1672) ---
+#
+# The byte-mutation fuzz in Group 3 flips one byte; it doesn't cover the
+# case where the file is truncated before `parse_ifd` has even finished
+# reading the entry table. The S2 typed-error work in #1661 fixed the
+# *value area* of each entry; the entry table itself (num_entries,
+# tag/type/count fields, next-IFD pointer) was still unguarded and
+# escaped with `struct.error` on truncation.
+
+from xrspatial.geotiff._header import parse_all_ifds, parse_header  # noqa: E402
+
+
+# Reuse the same corpus from Group 3; truncating each one covers
+# little- and big-endian, strip and tile, with and without geo tags.
+@pytest.mark.parametrize(
+    "label,base_tiff", _CORPUS, ids=[lab for lab, _ in _CORPUS]
+)
+@given(offset_frac=st.floats(min_value=0.0, max_value=1.0))
+@settings(
+    max_examples=80,
+    deadline=None,
+    suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture],
+)
+def test_truncation_typed_errors_only(label, base_tiff, offset_frac):
+    """Truncating a valid TIFF at any byte must raise a typed error.
+
+    For every truncation point the parser must either succeed (the cut
+    landed past the IFD chain), raise ValueError / TypeError, or raise
+    one of the documented memory/overflow refusals. `struct.error` from
+    `unpack_from` walking off the buffer is the failure mode we are
+    guarding against.
+    """
+    cut = int(offset_frac * len(base_tiff))
+    truncated = base_tiff[:cut]
+
+    try:
+        header = parse_header(truncated)
+        parse_all_ifds(truncated, header)
+    except ALLOWED_PARSE_EXCEPTIONS:
+        return
+    except (MemoryError, OverflowError):
+        return
+    except struct.error as exc:
+        pytest.fail(
+            f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) "
+            f"raised struct.error: {exc!r}"
+        )
+    except Exception as exc:
+        pytest.fail(
+            f"[{label}] truncation to {cut} bytes (of {len(base_tiff)}) "
+            f"raised non-typed {type(exc).__name__}: {exc!r}"
+        )
+
+
+# Targeted examples for each of the three #1672 bounds checks. These
+# pin the regression for the exact offsets the fuzz sweeps over and
+# give a fast-fail signal if any check is reverted.
+
+@example(byte_count=8)    # Cuts before num_entries.
+@example(byte_count=9)    # Splits the 2-byte num_entries field.
+@example(byte_count=20)   # Splits the first entry's tag/type/count.
+@given(byte_count=st.integers(min_value=0, max_value=400))
+@settings(
+    max_examples=40,
+    deadline=None,
+    suppress_health_check=[HealthCheck.too_slow, HealthCheck.function_scoped_fixture],
+)
+def test_truncation_in_entry_table_is_valueerror(byte_count):
+    """Every truncation inside the IFD entry table raises a typed error."""
+    base = make_minimal_tiff(4, 4, np.dtype('float32'))
+    truncated = base[:byte_count]
+    try:
+        header = parse_header(truncated)
+        parse_all_ifds(truncated, header)
+    except ALLOWED_PARSE_EXCEPTIONS:
+        return
+    except (MemoryError, OverflowError):
+        return
+    except struct.error as exc:
+        pytest.fail(
+            f"truncation to {byte_count} bytes raised struct.error: {exc!r}"
+        )
+    except Exception as exc:
+        pytest.fail(
+            f"truncation to {byte_count} bytes raised non-typed "
+            f"{type(exc).__name__}: {exc!r}"
+        )