From f1f8494702c937a5eef69bca664c1e32261728de Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 6 May 2026 06:02:57 -0700
Subject: [PATCH 1/3] Decode TIFF predictor=3 un-transpose by file byte order
 so big-endian floating-point TIFFs read exactly

The floating-point predictor (TIFF Tech Note 3) byte-swizzles each row
into bytes_per_sample lanes with the most significant byte in lane 0.
The decoder un-transposed those lanes assuming the output was always
little-endian: it placed the MSB at byte index bps-1 of each output
sample. Big-endian predictor=3 files therefore decoded into samples
with the bytes in the wrong order; the result still came back as a
clean float32/float64 array, just with garbage values.

The fix threads the file's byte order into the un-transpose step on
both CPU and GPU paths. For big-endian files the MSB now lands at byte
index 0 of each output sample. The little-endian path is unchanged.

The GPU output also needed a final byteswap when the file was
big-endian: the GPU decode keeps bytes in the file's order through
predictor decode and tile assembly, but cupy interprets the resulting
view as native (little-endian) byte order.

New tests (test_predictor3_big_endian.py) decode tifffile/imagecodecs-
encoded big-endian predictor=3 float32 and float64 files (strip and
tiled layouts, plus a GPU parity check) and confirm the existing
little-endian round-trip still works.
---
 CHANGELOG.md                                  |  6 ++
 xrspatial/geotiff/__init__.py                 |  2 +
 xrspatial/geotiff/_compression.py             | 42 +++++++--
 xrspatial/geotiff/_gpu_decode.py              | 63 +++++++++----
 xrspatial/geotiff/_reader.py                  | 14 +--
 .../tests/test_predictor3_big_endian.py       | 92 +++++++++++++++++++
 6 files changed, 187 insertions(+), 32 deletions(-)
 create mode 100644 xrspatial/geotiff/tests/test_predictor3_big_endian.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0cc2b4ec..2d3805ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
 -----------
 
 
+### Unreleased
+
+#### Bug fixes and improvements
+- Decode TIFF predictor=3 un-transpose by file byte order so big-endian floating-point TIFFs read back exactly
+
+
 ### Version 0.9.9 - 2026-05-05
 
 #### Bug fixes and improvements
diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
index 95b1ce15..c741ac20 100644
--- a/xrspatial/geotiff/__init__.py
+++ b/xrspatial/geotiff/__init__.py
@@ -1394,6 +1394,7 @@ def read_geotiff_gpu(source: str, *,
             source, offsets, byte_counts,
             tw, th, width, height,
             compression, predictor, file_dtype, samples,
+            byte_order=header.byte_order,
         )
     except Exception:
         pass
@@ -1416,6 +1417,7 @@ def read_geotiff_gpu(source: str, *,
                 compressed_tiles,
                 tw, th, width, height,
                 compression, predictor, file_dtype, samples,
+                byte_order=header.byte_order,
             )
         except (ValueError, Exception):
             # Unsupported compression -- fall back to CPU then transfer
diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py
index 087ebbe9..19a43a49 100644
--- a/xrspatial/geotiff/_compression.py
+++ b/xrspatial/geotiff/_compression.py
@@ -425,10 +425,18 @@ def predictor_encode(data: np.ndarray, width: int, height: int,
 # Decode: undo differencing, then un-transpose (lane b -> native byte bps-1-b).
 
 @ngjit
-def _fp_predictor_decode_row(row_data, width, bps):
+def _fp_predictor_decode_row(row_data, width, bps, big_endian):
     """Undo floating-point predictor for one row (in-place).
 
-    row_data: uint8 array of length width * bps
+    row_data: uint8 array of length width * bps.
+
+    Per TIFF Tech Note 3, lane 0 contains the most significant byte of
+    each sample regardless of the file's byte order.  After
+    un-transposing, the MSB has to land at the byte position that
+    corresponds to "most significant" *in the file's byte order*: index 0
+    for big-endian files and index ``bps-1`` for little-endian files.
+    The previous implementation always wrote MSB at index ``bps-1``,
+    which scrambled big-endian predictor=3 reads.
     """
     n = width * bps
 
@@ -436,26 +444,35 @@ def _fp_predictor_decode_row(row_data, width, bps):
     for i in range(1, n):
         row_data[i] = np.uint8((np.int32(row_data[i]) + np.int32(row_data[i - 1])) & 0xFF)
 
-    # Step 2: un-transpose bytes back to native sample order
+    # Step 2: un-transpose bytes back to the file's native sample order
     tmp = np.empty(n, dtype=np.uint8)
-    for sample in range(width):
-        for b in range(bps):
-            tmp[sample * bps + b] = row_data[(bps - 1 - b) * width + sample]
+    if big_endian:
+        # MSB lane (lane 0) -> byte index 0 in each output sample.
+        for sample in range(width):
+            for b in range(bps):
+                tmp[sample * bps + b] = row_data[b * width + sample]
+    else:
+        # MSB lane -> byte index ``bps-1`` (the high-order byte in LE).
+        for sample in range(width):
+            for b in range(bps):
+                tmp[sample * bps + b] = row_data[(bps - 1 - b) * width + sample]
     for i in range(n):
         row_data[i] = tmp[i]
 
 
 @ngjit
-def _fp_predictor_decode_rows(data, width, height, bps):
+def _fp_predictor_decode_rows(data, width, height, bps, big_endian):
     """Dispatch per-row decode from Numba, avoiding Python loop overhead."""
     row_len = width * bps
     for row in range(height):
         start = row * row_len
-        _fp_predictor_decode_row(data[start:start + row_len], width, bps)
+        _fp_predictor_decode_row(
+            data[start:start + row_len], width, bps, big_endian)
 
 
 def fp_predictor_decode(data: np.ndarray, width: int, height: int,
-                        bytes_per_sample: int) -> np.ndarray:
+                        bytes_per_sample: int,
+                        big_endian: bool = False) -> np.ndarray:
     """Undo floating-point predictor (predictor=3).
 
     Parameters
@@ -466,6 +483,10 @@ def fp_predictor_decode(data: np.ndarray, width: int, height: int,
         Tile/strip dimensions.
     bytes_per_sample : int
         Bytes per sample (e.g. 4 for float32, 8 for float64).
+    big_endian : bool
+        Whether the source file is big-endian (BOM ``MM``).  The
+        un-transpose puts the MSB lane at byte index 0 of each output
+        sample for BE files, and at byte index ``bps-1`` for LE files.
 
     Returns
     -------
@@ -473,7 +494,8 @@ def fp_predictor_decode(data: np.ndarray, width: int, height: int,
         Corrected array.
     """
     buf = np.ascontiguousarray(data)
-    _fp_predictor_decode_rows(buf, width, height, bytes_per_sample)
+    _fp_predictor_decode_rows(buf, width, height, bytes_per_sample,
+                              big_endian)
     return buf
 
 
diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py
index 67c36829..5305b83b 100644
--- a/xrspatial/geotiff/_gpu_decode.py
+++ b/xrspatial/geotiff/_gpu_decode.py
@@ -644,11 +644,14 @@ def _predictor_decode_kernel(data, width, height, bytes_per_sample):
 
 
 @cuda.jit
-def _fp_predictor_decode_kernel(data, tmp, width, height, bps):
+def _fp_predictor_decode_kernel(data, tmp, width, height, bps, big_endian):
     """Undo floating-point predictor (predictor=3), one thread per row.
 
     data: flat uint8 device array
     tmp: scratch buffer, same size as data
+    big_endian: when True, place the MSB lane at byte index 0 of each
+        output sample (file is big-endian); when False, place it at
+        byte index ``bps-1`` (file is little-endian).
     """
     row = cuda.grid(1)
     if row >= height:
@@ -663,10 +666,17 @@ def _fp_predictor_decode_kernel(data, tmp, width, height, bps):
         data[idx] = numba_uint8(
             (numba_int32(data[idx]) + numba_int32(data[idx - 1])) & 0xFF)
 
-    # Step 2: un-transpose byte lanes (MSB-first) back to native order
-    for sample in range(width):
-        for b in range(bps):
-            tmp[start + sample * bps + b] = data[start + (bps - 1 - b) * width + sample]
+    # Step 2: un-transpose byte lanes back to the file's native sample
+    # order.  Lane 0 always contains the MSB byte (TIFF Tech Note 3); the
+    # MSB lands at byte index 0 (BE) or bps-1 (LE) of each output sample.
+    if big_endian:
+        for sample in range(width):
+            for b in range(bps):
+                tmp[start + sample * bps + b] = data[start + b * width + sample]
+    else:
+        for sample in range(width):
+            for b in range(bps):
+                tmp[start + sample * bps + b] = data[start + (bps - 1 - b) * width + sample]
 
     # Copy back
     for i in range(row_len):
@@ -1259,6 +1269,7 @@ def gpu_decode_tiles_from_file(
     predictor: int,
     dtype: np.dtype,
     samples: int = 1,
+    byte_order: str = '<',
 ):
     """Decode tiles from a file, using GDS if available.
 
@@ -1289,7 +1300,8 @@ def gpu_decode_tiles_from_file(
                 return _apply_predictor_and_assemble(
                     d_decomp, d_decomp_offsets, len(d_tiles),
                     tile_width, tile_height, image_width, image_height,
-                    predictor, dtype, samples, tile_bytes)
+                    predictor, dtype, samples, tile_bytes,
+                    byte_order=byte_order)
 
         # GDS read succeeded but nvCOMP can't decompress on GPU,
         # or it's LZW/deflate. Copy tiles to host and use normal path.
@@ -1301,7 +1313,8 @@ def gpu_decode_tiles_from_file(
 
     return gpu_decode_tiles(
         compressed_tiles, tile_width, tile_height,
-        image_width, image_height, compression, predictor, dtype, samples)
+        image_width, image_height, compression, predictor, dtype, samples,
+        byte_order=byte_order)
 
 
 def _try_nvcomp_from_device_bufs(d_tiles, tile_bytes, compression):
@@ -1373,11 +1386,13 @@ class _NvcompDecompOpts(ctypes.Structure):
 def _apply_predictor_and_assemble(d_decomp, d_decomp_offsets, n_tiles,
                                     tile_width, tile_height,
                                     image_width, image_height,
-                                    predictor, dtype, samples, tile_bytes):
+                                    predictor, dtype, samples, tile_bytes,
+                                    byte_order: str = '<'):
     """Apply predictor decode and tile assembly on GPU."""
     import cupy
 
     bytes_per_pixel = dtype.itemsize * samples
+    big_endian = (byte_order == '>')
 
     if predictor == 2:
         total_rows = n_tiles * tile_height
@@ -1395,7 +1410,8 @@ def _apply_predictor_and_assemble(d_decomp, d_decomp_offsets, n_tiles,
         bpg = math.ceil(total_rows / tpb)
         d_tmp = cupy.empty_like(d_decomp)
         _fp_predictor_decode_kernel[bpg, tpb](
-            d_decomp, d_tmp, tile_width * samples, total_rows, dtype.itemsize)
+            d_decomp, d_tmp, tile_width * samples, total_rows,
+            dtype.itemsize, big_endian)
         cuda.synchronize()
 
     tiles_across = math.ceil(image_width / tile_width)
@@ -1415,10 +1431,15 @@ def _apply_predictor_and_assemble(d_decomp, d_decomp_offsets, n_tiles,
     cuda.synchronize()
 
     if samples > 1:
-        return d_output.view(dtype=cupy.dtype(dtype)).reshape(
+        out = d_output.view(dtype=cupy.dtype(dtype)).reshape(
             image_height, image_width, samples)
-    return d_output.view(dtype=cupy.dtype(dtype)).reshape(
-        image_height, image_width)
+    else:
+        out = d_output.view(dtype=cupy.dtype(dtype)).reshape(
+            image_height, image_width)
+    if big_endian and dtype.itemsize > 1:
+        # See gpu_decode_tiles for why BE samples need a final byteswap.
+        out = out.byteswap()
+    return out
 
 
 def gpu_decode_tiles(
@@ -1431,6 +1452,7 @@ def gpu_decode_tiles(
     predictor: int,
     dtype: np.dtype,
     samples: int = 1,
+    byte_order: str = '<',
 ):
     """Decode and assemble TIFF tiles entirely on GPU.
 
@@ -1647,7 +1669,8 @@ def gpu_decode_tiles(
         bpg = math.ceil(total_rows / tpb)
         d_tmp = cupy.empty_like(d_decomp)
         _fp_predictor_decode_kernel[bpg, tpb](
-            d_decomp, d_tmp, tile_width * samples, total_rows, dtype.itemsize)
+            d_decomp, d_tmp, tile_width * samples, total_rows,
+            dtype.itemsize, byte_order == '>')
         cuda.synchronize()
 
     # Assemble tiles into output image on GPU
@@ -1669,10 +1692,18 @@ def gpu_decode_tiles(
 
     # Reshape to image
     if samples > 1:
-        return d_output.view(dtype=cupy.dtype(dtype)).reshape(
+        out = d_output.view(dtype=cupy.dtype(dtype)).reshape(
             image_height, image_width, samples)
-    return d_output.view(dtype=cupy.dtype(dtype)).reshape(
-        image_height, image_width)
+    else:
+        out = d_output.view(dtype=cupy.dtype(dtype)).reshape(
+            image_height, image_width)
+    # The decoded byte stream is in the file's byte order; cupy view
+    # interprets it as native (always little-endian on supported GPUs),
+    # so big-endian samples that are wider than a byte must be swapped
+    # back to native before the values mean anything.
+    if byte_order == '>' and dtype.itemsize > 1:
+        out = out.byteswap()
+    return out
 
 
 # ---------------------------------------------------------------------------
diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
index feb59574..04504b1b 100644
--- a/xrspatial/geotiff/_reader.py
+++ b/xrspatial/geotiff/_reader.py
@@ -341,7 +341,7 @@ def _open_source(source: str):
 
 def _apply_predictor(chunk: np.ndarray, pred: int, width: int,
                      height: int, bytes_per_sample: int,
-                     samples: int = 1) -> np.ndarray:
+                     samples: int = 1, byte_order: str = '<') -> np.ndarray:
     """Apply the appropriate predictor decode to decompressed data.
 
     ``width``, ``height``, ``bytes_per_sample``, and ``samples`` describe
@@ -353,16 +353,17 @@ def _apply_predictor(chunk: np.ndarray, pred: int, width: int,
 
     Predictor=3 (floating-point) byte-swizzles each row into
     ``bytes_per_sample`` interleaved lanes of length ``width * samples``,
-    per TIFF Technical Note 3.  Passing ``bytes_per_sample * samples`` as
-    the lane count (the pre-fix behaviour) swizzles over the wrong lane
-    count and scrambles multi-band pixel values.
+    per TIFF Technical Note 3.  The un-transpose stage has to put the
+    MSB lane at the file's high-order byte position, which differs for
+    big- vs little-endian files; ``byte_order`` carries that.
     """
     if pred == 2:
         return predictor_decode(chunk, width, height,
                                 bytes_per_sample * samples)
     elif pred == 3:
         return fp_predictor_decode(chunk, width * samples, height,
-                                   bytes_per_sample)
+                                   bytes_per_sample,
+                                   big_endian=(byte_order == '>'))
     return chunk
 
 
@@ -413,7 +414,8 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
         if not chunk.flags.writeable:
             chunk = chunk.copy()
         chunk = _apply_predictor(chunk, pred, width, height,
-                                 bytes_per_sample, samples=samples)
+                                 bytes_per_sample, samples=samples,
+                                 byte_order=byte_order)
 
     if is_sub_byte:
         pixels = unpack_bits(chunk, bps, pixel_count)
diff --git a/xrspatial/geotiff/tests/test_predictor3_big_endian.py b/xrspatial/geotiff/tests/test_predictor3_big_endian.py
new file mode 100644
index 00000000..aec4c093
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_predictor3_big_endian.py
@@ -0,0 +1,92 @@
+"""Predictor=3 big-endian round-trip tests.
+
+The floating-point predictor (TIFF Tech Note 3) byte-swizzles each row
+into ``bytes_per_sample`` lanes, MSB-first.  The decoder un-transposes
+those lanes back to the file's native byte order; the byte position of
+the MSB differs between BE (index 0) and LE (index ``bps-1``).
+
+Before the fix the un-transpose always wrote MSB at index ``bps-1``, so
+big-endian predictor=3 files decoded to garbage values even though they
+came back as a clean ``float32``/``float64`` array (no error, just wrong
+numbers - the worst kind of bug for raster data).
+"""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from xrspatial.geotiff._reader import read_to_array
+
+tifffile = pytest.importorskip("tifffile")
+pytest.importorskip("imagecodecs")  # tifffile needs it for predictor=3
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_big_endian_predictor3_round_trip(tmp_path, dtype):
+    """xrspatial reads a big-endian predictor=3 file exactly."""
+    arr = np.array(
+        [
+            [1.0, 2.0, 3.0, 4.0],
+            [5.0, 6.0, 7.0, 8.0],
+            [1.5, 2.5, 3.5, 4.5],
+            [10.0, 20.0, 30.0, 40.0],
+        ],
+        dtype=dtype,
+    )
+    path = tmp_path / f"be_pred3_{np.dtype(dtype).name}.tif"
+    tifffile.imwrite(
+        str(path), arr, byteorder=">", predictor=3, compression="deflate")
+
+    out, _ = read_to_array(str(path))
+    np.testing.assert_array_equal(out, arr)
+
+
+def test_little_endian_predictor3_still_round_trips(tmp_path):
+    """Sanity: the BE fix did not break the LE path."""
+    arr = np.linspace(-100.0, 100.0, 64, dtype=np.float32).reshape(8, 8)
+    path = tmp_path / "le_pred3_f32.tif"
+    tifffile.imwrite(
+        str(path), arr, byteorder="<", predictor=3, compression="deflate")
+
+    out, _ = read_to_array(str(path))
+    np.testing.assert_array_equal(out, arr)
+
+
+def test_big_endian_predictor3_tiled(tmp_path):
+    """BE predictor=3 with tiled layout, multiple tiles per row."""
+    rng = np.random.RandomState(20260506)
+    arr = rng.standard_normal((32, 48)).astype(np.float32)
+    path = tmp_path / "be_pred3_tiled.tif"
+    tifffile.imwrite(
+        str(path), arr, byteorder=">", predictor=3,
+        compression="deflate", tile=(16, 16))
+
+    out, _ = read_to_array(str(path))
+    np.testing.assert_array_equal(out, arr)
+
+
+def test_big_endian_predictor3_gpu(tmp_path):
+    """The GPU decode path also handles big-endian predictor=3."""
+    cupy = pytest.importorskip("cupy")
+    if not cupy.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+    from xrspatial.geotiff import open_geotiff
+
+    rng = np.random.RandomState(20260506)
+    arr = rng.standard_normal((32, 48)).astype(np.float32)
+    path = tmp_path / "be_pred3_gpu.tif"
+    tifffile.imwrite(
+        str(path), arr, byteorder=">", predictor=3,
+        compression="deflate", tile=(16, 16))
+
+    cpu = open_geotiff(str(path)).values
+    np.testing.assert_array_equal(cpu, arr)
+
+    gpu_da = open_geotiff(str(path), gpu=True)
+    gpu_arr = gpu_da.data
+    if hasattr(gpu_arr, "get"):
+        gpu_arr = gpu_arr.get()
+    else:
+        gpu_arr = np.asarray(gpu_arr)
+    np.testing.assert_array_equal(gpu_arr, arr)

From 9180fb3462607a094ba25a2c949d70ea0c396041 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 6 May 2026 06:03:45 -0700
Subject: [PATCH 2/3] Record geotiff predictor sweep findings in accuracy state

---
 .claude/sweep-accuracy-state.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv
index f4b17e1e..0af0a12a 100644
--- a/.claude/sweep-accuracy-state.csv
+++ b/.claude/sweep-accuracy-state.csv
@@ -12,7 +12,7 @@ emerging_hotspots,2026-04-30,,MEDIUM,2;3,MEDIUM: threshold_90 uses int() (trunca
 fire,2026-04-30,,,,All ops per-pixel (no accumulation/stencil/projected distance). NaN handled via x!=x; CUDA bounds use strict <; rdnbr and ros divisions guarded; CPU/GPU/dask paths algorithmically identical. No accuracy issues found.
 flood,2026-04-30,,MEDIUM,2;5,"MEDIUM (not fixed): dask backend preserves float32 input dtype while numpy promotes to float64 in flood_depth and curve_number_runoff; DataArray inputs for curve_number, mannings_n bypass scalar > 0 (and CN <= 100) range validation, silently producing NaN/garbage."
 focal,2026-03-30T13:00:00Z,1092,,,
-geotiff,2026-04-23,1247,HIGH,1;3;4;5,HIGH fixed: CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: eager and streaming writers emit LONG8 strip/tile offsets in BigTIFF output (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py.
+geotiff,2026-05-06,1499;1500,CRITICAL,1;3,"Pass 3 (2026-05-06): CRITICAL fixed PR #1499 - predictor=2 was byte-wise at sample stride, loses carries inside multi-byte samples; libtiff/GDAL-written uint16/int16/uint32/int32 deflate files silently decoded to wrong values. Now sample-wise per TIFF spec, with file-byte-order dtype view; per-channel for chunky multi-band; CPU and GPU. HIGH fixed PR #1500 - predictor=3 un-transpose always wrote MSB at byte index bps-1 so big-endian float TIFFs decoded to garbage; now byte-order aware; GPU output also gets a final byteswap for BE files. | HIGH fixed: CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: eager and streaming writers emit LONG8 strip/tile offsets in BigTIFF output (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py."
 glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pairs as zero texture; fixed via nanmean-style averaging"
 hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output."
 hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues.

From c0657cfc8a41a22fa5db151b74895db76c3a3664 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Wed, 6 May 2026 06:06:25 -0700
Subject: [PATCH 3/3] Correct sweep state: #1499 was duplicate of #1498, only
 #1500 stands

The third-pass agent worked from a stale local main and re-derived the
predictor=2 sample-wise fix that already shipped in #1498. That PR has been
closed as a duplicate. Only the predictor=3 big-endian fix in #1500 is a
genuine new finding.
---
 .claude/sweep-accuracy-state.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv
index 0af0a12a..81ec6b4d 100644
--- a/.claude/sweep-accuracy-state.csv
+++ b/.claude/sweep-accuracy-state.csv
@@ -12,7 +12,7 @@ emerging_hotspots,2026-04-30,,MEDIUM,2;3,MEDIUM: threshold_90 uses int() (trunca
 fire,2026-04-30,,,,All ops per-pixel (no accumulation/stencil/projected distance). NaN handled via x!=x; CUDA bounds use strict <; rdnbr and ros divisions guarded; CPU/GPU/dask paths algorithmically identical. No accuracy issues found.
 flood,2026-04-30,,MEDIUM,2;5,"MEDIUM (not fixed): dask backend preserves float32 input dtype while numpy promotes to float64 in flood_depth and curve_number_runoff; DataArray inputs for curve_number, mannings_n bypass scalar > 0 (and CN <= 100) range validation, silently producing NaN/garbage."
 focal,2026-03-30T13:00:00Z,1092,,,
-geotiff,2026-05-06,1499;1500,CRITICAL,1;3,"Pass 3 (2026-05-06): CRITICAL fixed PR #1499 - predictor=2 was byte-wise at sample stride, loses carries inside multi-byte samples; libtiff/GDAL-written uint16/int16/uint32/int32 deflate files silently decoded to wrong values. Now sample-wise per TIFF spec, with file-byte-order dtype view; per-channel for chunky multi-band; CPU and GPU. HIGH fixed PR #1500 - predictor=3 un-transpose always wrote MSB at byte index bps-1 so big-endian float TIFFs decoded to garbage; now byte-order aware; GPU output also gets a final byteswap for BE files. | HIGH fixed: CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: eager and streaming writers emit LONG8 strip/tile offsets in BigTIFF output (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py."
+geotiff,2026-05-06,1500,HIGH,3,"Pass 3 (2026-05-06): HIGH fixed PR #1500 - predictor=3 un-transpose always wrote MSB at byte index bps-1 so big-endian float TIFFs decoded to garbage; now byte-order aware; GPU output also gets a final byteswap for BE files. (Pass 3 also opened #1499 for predictor=2 sample-wise decode but it was a duplicate of already-merged #1498 -- closed.) | Pass 2 (2026-05-05) PR #1498: predictor=2 was byte-wise at sample stride, multi-byte integer files written by libtiff/GDAL silently decoded to wrong values; now sample-wise per TIFF spec on CPU and GPU. | Pass 1 (2026-04-23) PR #1247: HIGH fixed CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: BigTIFF writers emit LONG8 strip/tile offsets (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py."
 glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pairs as zero texture; fixed via nanmean-style averaging"
 hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output."
 hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues.