From 20dd59f116362d2e09e6137937e0a340a9cefe11 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 9 May 2026 18:14:48 -0700 Subject: [PATCH 1/3] Update y/x coords for mirror-flip orientations (#1537) Orientations 2/3/4 in the TIFF Orientation tag (274) flip the array horizontally, both axes, or vertically. PR #1521 applied the buffer remap but left the y/x coord arrays computed from the un-flipped transform, so xarray label lookups against a georeferenced file returned the wrong pixel under any of those orientations. The fix updates the geo_info transform after the array remap so a displayed pixel and its geographic label point to the same location on the ground. PixelIsArea and PixelIsPoint both work; the unrotated transform is still preserved for ungeoreferenced files (coords stay on integer pixel indices). --- xrspatial/geotiff/_reader.py | 77 ++++++++-- xrspatial/geotiff/tests/test_orientation.py | 159 ++++++++++++++++++++ 2 files changed, 220 insertions(+), 16 deletions(-) diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 6188a879..af2c7a27 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -21,7 +21,12 @@ unpack_bits, ) from ._dtypes import SUB_BYTE_BPS, resolve_bits_per_sample, tiff_dtype_to_numpy -from ._geotags import GeoInfo, GeoTransform, extract_geo_info +from ._geotags import ( + GeoInfo, + GeoTransform, + RASTER_PIXEL_IS_POINT, + extract_geo_info, +) from ._header import ( IFD, TIFFHeader, @@ -1541,23 +1546,63 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, arr = arr[:, :, band] if orientation != 1: + # Use the *file* dimensions (before orientation) for the + # transform-flip math below. After ``_apply_orientation`` the + # array shape may swap (orientations 5-8), so capture them now. + file_h = arr.shape[0] + file_w = arr.shape[1] arr = _apply_orientation(arr, orientation) - # Orientations 5-8 swap rows and columns, so the file's stored - # pixel_width sits on the y-axis of the displayed array and - # vice versa. Swap the transform's pixel sizes so the coord - # arrays come out the right length. Signs are preserved - # rather than coerced to north-up, since some legitimate files - # use a non-standard sign convention (south-up, west-up). + # The pixel buffer was just remapped; the transform that maps + # display pixels back to geographic coordinates needs the + # matching remap or the y/x coords still describe the file's + # original layout. + # + # Orientations 2-4 are pure mirror flips: the array shape stays + # the same, but the displayed origin moves to the opposite + # edge along whichever axes were flipped. Update origin and + # sign of the affected pixel scale so xarray coords land on + # the right geographic positions. # - # For orientations 6/7/8 (rotations + flips, not a pure - # transpose) the swap is geometrically inexact for georef'd - # files: a strict implementation would also adjust origin - # and re-sign per axis. Such files are vanishingly rare in - # practice (TIFF Orientation 5-8 with a meaningful - # ModelTransformation), and getting it right requires a - # design pass; we warn instead so the user knows to verify. - if orientation in (5, 6, 7, 8): - t = geo_info.transform + # Orientations 5-8 swap rows and columns. Pixel sizes swap + # axes so coord array lengths match the new shape. Signs are + # preserved rather than coerced to north-up since some + # legitimate files use a non-standard sign convention + # (south-up, west-up). For 6/7/8 (rotations + flips, not a + # pure transpose) the swap is geometrically inexact for + # georef'd files: a strict implementation would also adjust + # origin and re-sign per axis. Those files are vanishingly + # rare in practice (TIFF Orientation 5-8 with a meaningful + # ModelTransformation); warn so the user knows to verify. + t = geo_info.transform + if t is not None and orientation in (2, 3, 4): + # PixelIsPoint tiepoints are at pixel centers, so the + # opposite-edge pixel sits ``(N-1) * step`` away. PixelIsArea + # tiepoints are at pixel edges, so the opposite edge is + # ``N * step`` away. The two cases collapse to a single + # formula below by switching the offset. + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + x_shift = file_w - 1 + y_shift = file_h - 1 + else: + x_shift = file_w + y_shift = file_h + new_origin_x = t.origin_x + new_origin_y = t.origin_y + new_px_w = t.pixel_width + new_px_h = t.pixel_height + if orientation in (2, 3): # x flipped + new_origin_x = t.origin_x + x_shift * t.pixel_width + new_px_w = -t.pixel_width + if orientation in (3, 4): # y flipped + new_origin_y = t.origin_y + y_shift * t.pixel_height + new_px_h = -t.pixel_height + geo_info.transform = GeoTransform( + origin_x=new_origin_x, + origin_y=new_origin_y, + pixel_width=new_px_w, + pixel_height=new_px_h, + ) + elif orientation in (5, 6, 7, 8): geo_info.transform = GeoTransform( origin_x=t.origin_x, origin_y=t.origin_y, diff --git a/xrspatial/geotiff/tests/test_orientation.py b/xrspatial/geotiff/tests/test_orientation.py index da693002..de2601f8 100644 --- a/xrspatial/geotiff/tests/test_orientation.py +++ b/xrspatial/geotiff/tests/test_orientation.py @@ -235,6 +235,165 @@ def test_orientation_5_to_8_warn_on_georef(tmp_path, orientation): ) +# --------------------------------------------------------------------------- +# Geographic coordinate updates for mirror-flip orientations (issue #1537) +# --------------------------------------------------------------------------- +# +# Orientations 2/3/4 flip the array horizontally, both axes, or vertically. +# The reader used to apply the buffer flip but leave the y/x coord arrays +# computed from the original transform, so xarray label-based lookups +# returned the wrong pixel for georeferenced files. + +_GEOREF_EXTRA_AREA = [ + (33550, 'd', 3, (1.0, 1.0, 0.0)), + (33922, 'd', 6, (0.0, 0.0, 0.0, 100.0, 50.0, 0.0)), + (34735, 'H', 12, ( + 1, 1, 0, 2, + 1024, 0, 1, 2, + 2048, 0, 1, 4326, + )), +] +_GEOREF_EXTRA_POINT = [ + (33550, 'd', 3, (1.0, 1.0, 0.0)), + (33922, 'd', 6, (0.0, 0.0, 0.0, 100.0, 50.0, 0.0)), + (34735, 'H', 16, ( + 1, 1, 0, 3, + 1024, 0, 1, 2, + 1025, 0, 1, 2, # PixelIsPoint + 2048, 0, 1, 4326, + )), +] + + +def _write_with_orient_and_georef(path, arr, orientation, raster_type='area'): + """Write *arr* with Orientation tag + EPSG:4326 georef.""" + extras = (_GEOREF_EXTRA_POINT if raster_type == 'point' + else _GEOREF_EXTRA_AREA) + tifffile.imwrite( + str(path), arr, + extratags=[(274, 'H', 1, orientation, True)] + [ + (tag, dt, count, val, True) for (tag, dt, count, val) in extras + ], + ) + + +@pytest.mark.parametrize('orientation', [2, 3, 4]) +def test_orient_2_3_4_coords_track_pixel_flip_area(tmp_path, orientation): + """Mirror-flip orientations: a label-based lookup at a fixed (x, y) + must return the same pixel value regardless of the file's orientation. + """ + arr = np.arange(24, dtype=np.uint8).reshape(4, 6) # h=4, w=6 + + # Reference: orientation=1 (no transform) tells us the geographic + # coords for each file pixel under PixelIsArea. Pick three corners + # so x-flip, y-flip, and combined flips each have a target. + ref_path = tmp_path / 'orient_ref.tif' + _write_with_orient_and_georef(ref_path, arr, 1) + ref = open_geotiff(str(ref_path)) + # Pixel (0, 0): top-left, value 0, at x=100.5, y=49.5 + # Pixel (0, 5): top-right, value 5, at x=105.5, y=49.5 + # Pixel (3, 0): bottom-left, value 18, at x=100.5, y=46.5 + # Pixel (3, 5): bottom-right, value 23, at x=105.5, y=46.5 + targets = [ + (100.5, 49.5, 0), + (105.5, 49.5, 5), + (100.5, 46.5, 18), + (105.5, 46.5, 23), + ] + for x, y, expected in targets: + assert int(ref.sel(x=x, y=y).item()) == expected + + # Now check the same coords under the flipped orientation. + path = tmp_path / f'orient_{orientation}.tif' + _write_with_orient_and_georef(path, arr, orientation) + da = open_geotiff(str(path)) + for x, y, expected in targets: + got = int(da.sel(x=x, y=y).item()) + assert got == expected, ( + f'orientation={orientation}: sel(x={x}, y={y}) returned {got}, ' + f'expected {expected} (mirror-flip lost the pixel-to-coord ' + f'binding)' + ) + + +@pytest.mark.parametrize('orientation', [2, 3, 4]) +def test_orient_2_3_4_coords_track_pixel_flip_point(tmp_path, orientation): + """Same coord-fidelity check for PixelIsPoint files.""" + arr = np.arange(24, dtype=np.uint8).reshape(4, 6) + + ref_path = tmp_path / 'orient_ref_pt.tif' + _write_with_orient_and_georef(ref_path, arr, 1, raster_type='point') + ref = open_geotiff(str(ref_path)) + # PixelIsPoint: pixel (0, 0) center = (100, 50), pixel (0, 5) = (105, 50) + # pixel (3, 0) = (100, 47), pixel (3, 5) = (105, 47) + targets = [ + (100.0, 50.0, 0), + (105.0, 50.0, 5), + (100.0, 47.0, 18), + (105.0, 47.0, 23), + ] + for x, y, expected in targets: + assert int(ref.sel(x=x, y=y).item()) == expected + + path = tmp_path / f'orient_{orientation}_pt.tif' + _write_with_orient_and_georef(path, arr, orientation, raster_type='point') + da = open_geotiff(str(path)) + for x, y, expected in targets: + got = int(da.sel(x=x, y=y).item()) + assert got == expected, ( + f'PixelIsPoint orient={orientation}: sel(x={x}, y={y})={got}, ' + f'expected {expected}' + ) + + +@pytest.mark.parametrize( + 'orientation,expected_first_x,expected_first_y', + [ + # x_first=100.5 means the leftmost displayed column carries the + # original left-edge geographic coordinate; 105.5 means it carries + # the original right-edge coordinate (i.e. the coord array got + # flipped along x). + (2, 105.5, 49.5), # x flipped, y unchanged + (3, 105.5, 46.5), # both flipped + (4, 100.5, 46.5), # x unchanged, y flipped + ], +) +def test_orient_2_3_4_coord_arrays( + tmp_path, orientation, expected_first_x, expected_first_y, +): + """The first y/x coord lands on the right edge after a flip.""" + arr = np.arange(24, dtype=np.uint8).reshape(4, 6) + path = tmp_path / f'orient_arr_{orientation}.tif' + _write_with_orient_and_georef(path, arr, orientation) + + da = open_geotiff(str(path)) + np.testing.assert_allclose(da.x[0].item(), expected_first_x) + np.testing.assert_allclose(da.y[0].item(), expected_first_y) + + +def test_orient_2_3_4_no_geo_still_uses_pixel_coords(tmp_path): + """Without georef, the legacy integer pixel coord behaviour is preserved. + + A file without ModelPixelScale / ModelTiepoint stays on integer + coords regardless of orientation; the fix must not start fabricating + coordinates for un-georeferenced files. + """ + arr = np.arange(24, dtype=np.uint8).reshape(4, 6) + for orientation in (2, 3, 4): + path = tmp_path / f'orient_no_geo_{orientation}.tif' + tifffile.imwrite( + str(path), arr, + extratags=[(274, 'H', 1, orientation, True)], + ) + da = open_geotiff(str(path)) + # Coords default to 0..N-1 when has_georef is False; the + # orientation transform fix must not change that. + assert da.x.values.dtype.kind in ('i', 'u'), ( + f'orient={orientation}: x coords drifted off integer ' + f'(dtype={da.x.values.dtype})' + ) + + def test_orientation_with_band_selection_returns_2d(tmp_path): """band= followed by an orientation transpose returns a 2D array. From 1516838b5d4496125ebf8f39b8ec7493651d96cf Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 9 May 2026 18:23:39 -0700 Subject: [PATCH 2/3] Update sweep-accuracy-state for pass 9 --- .claude/sweep-accuracy-state.csv | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 04e95240..1b22c248 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -1,19 +1,18 @@ module,last_inspected,issue,severity_max,categories_found,notes balanced_allocation,2026-04-14T12:00:00Z,1203,,,float32 allocation array caused source ID mismatch for non-integer IDs. Fix in PR #1205. -corridor,2026-05-01,,LOW,1,"LOW: corridor inherits float32 from cost_distance; for very large accumulated costs, normalized = corridor - corridor_min loses precision near min (intrinsic to upstream dtype, not corridor itself). NaN handling correct (skipna min, np.isfinite check before normalize). All 4 backends route through pure xarray arithmetic; threshold uses dask/cupy/numpy where with try/except dispatch. No CRIT/HIGH issues." bilateral,2026-05-01,,,,"No CRIT/HIGH/MEDIUM. Sigma underflow validated via sqrt(tiny) bound; oversize sigma clamped. float64 throughout numpy/cupy. NaN center returns NaN; NaN neighbors skipped (denom not incremented). w_sum>0 guard avoids div-by-zero. map_overlap depth==kernel radius. CUDA bounds correct. Inf input could yield 0*inf=NaN in v_sum but unvalidated input is general xrspatial pattern, not bilateral-specific." contour,2026-05-01,,,,"Marching squares correct: NaN check uses self-inequality, loop bounds (ny-1,nx-1) cover all quads, dask overlap depth=1 matches 2x2 stencil, float64 cast consistent across backends, saddle disambiguation via center value. No CRIT/HIGH issues; minor LOW (Inf inputs not specifically rejected) not flagged." +corridor,2026-05-01,,LOW,1,"LOW: corridor inherits float32 from cost_distance; for very large accumulated costs, normalized = corridor - corridor_min loses precision near min (intrinsic to upstream dtype, not corridor itself). NaN handling correct (skipna min, np.isfinite check before normalize). All 4 backends route through pure xarray arithmetic; threshold uses dask/cupy/numpy where with try/except dispatch. No CRIT/HIGH issues." cost_distance,2026-04-13T12:00:00Z,1191,,,CuPy Bellman-Ford max_iterations = h+w instead of h*w. Fix in PR #1192. curvature,2026-03-30T15:00:00Z,,,,Formula matches ArcGIS reference. Backends consistent. No issues found. dasymetric,2026-04-14T12:00:00Z,,,,Mass conservation correct. Weighted/binary/limiting_variable all verified. Pycnophylactic Tobler algorithm correct. -edge_detection,2026-05-01,,,,Thin wrappers around convolve_2d with fixed Sobel/Prewitt/Laplacian kernels; no issues found diffusion,2026-05-01,,LOW,1;2;5,"LOW: no Kahan summation across long iterations (drift over 100k steps, standard for explicit Euler); lap=n+s+w+e-4*val has catastrophic cancellation for nearly-uniform large values; res=0 in attrs causes div-by-zero (no guard); dask+cupy boundary='nan' relies on dask accepting cp.nan as fill. CPU/GPU NaN handling consistent (np.isnan vs val!=val). depth=1 matches stencil radius. Memory guards, CFL check, step cap all in place. No CRIT/HIGH." +edge_detection,2026-05-01,,,,Thin wrappers around convolve_2d with fixed Sobel/Prewitt/Laplacian kernels; no issues found emerging_hotspots,2026-04-30,,MEDIUM,2;3,MEDIUM: threshold_90 uses int() (truncation) instead of ceil() so n_times=11 requires only 9/11 (81.8%) instead of 90%. MEDIUM: NaN time steps produce gi_bin=0 which classifier counts as 'non-significant' rather than missing; threshold_90 uses full n_times not valid count. LOW: 'global_std == 0' check does not catch NaN std for fully/mostly NaN inputs. fire,2026-04-30,,,,All ops per-pixel (no accumulation/stencil/projected distance). NaN handled via x!=x; CUDA bounds use strict <; rdnbr and ros divisions guarded; CPU/GPU/dask paths algorithmically identical. No accuracy issues found. flood,2026-04-30,,MEDIUM,2;5,"MEDIUM (not fixed): dask backend preserves float32 input dtype while numpy promotes to float64 in flood_depth and curve_number_runoff; DataArray inputs for curve_number, mannings_n bypass scalar > 0 (and CN <= 100) range validation, silently producing NaN/garbage." focal,2026-03-30T13:00:00Z,1092,,, -geotiff,2026-05-06,1500,HIGH,3,"Pass 3 (2026-05-06): HIGH fixed PR #1500 - predictor=3 un-transpose always wrote MSB at byte index bps-1 so big-endian float TIFFs decoded to garbage; now byte-order aware; GPU output also gets a final byteswap for BE files. (Pass 3 also opened #1499 for predictor=2 sample-wise decode but it was a duplicate of already-merged #1498 -- closed.) | Pass 2 (2026-05-05) PR #1498: predictor=2 was byte-wise at sample stride, multi-byte integer files written by libtiff/GDAL silently decoded to wrong values; now sample-wise per TIFF spec on CPU and GPU. | Pass 1 (2026-04-23) PR #1247: HIGH fixed CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: BigTIFF writers emit LONG8 strip/tile offsets (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py." -geotiff,2026-05-06,1498,HIGH,1;5,"HIGH fixed: CPU fp_predictor_decode wrong byte-lane layout for multi-sample predictor=3 (GPU was correct). MEDIUMs also fixed on same PR: eager and streaming writers emit LONG8 strip/tile offsets in BigTIFF output (were LONG); VRT read honors AREA_OR_POINT=Point; VRT nodata cast uses source dtype instead of float32. LOW fixed: duplicate LERC codec block removed from _compression.py. 2026-05-05 deep pass: HIGH fixed in #1498 -- predictor=2 (horizontal differencing) did byte-wise differencing instead of sample-wise; uint8 was correct but uint16/int16/uint32/int32 silently corrupted pixel values both on read of GDAL/libtiff/tifffile/rasterio-written files and on write (their readers saw garbage). CPU + GPU + writer all reworked to dispatch per-dtype kernels at sample resolution with stride=samples. LOW deferred: ModelTiepointTag with multiple tiepoints uses only the first 6 doubles (no warning); rare in practice but silently produces wrong georeferencing for control-point-georeferenced files. 2026-05-06 fourth pass: HIGH fixed in fix-sparse-cog-tile branch -- sparse COG tiles/strips (TileByteCounts/StripByteCounts == 0 from GDAL SPARSE_OK=TRUE) crashed local mmap reads (""incomplete or truncated stream""), returned uninitialised np.empty memory in the COG-HTTP path, and broke GPU reads. Fix: detect sparse blocks, pre-fill the result with the file's nodata sentinel (0 if unset), skip those entries in the decode loop. New tests cover tiled+stripped, with/without nodata, raw and accessor reads, plus GPU. Re-confirmed clean: PackBits decode against GDAL output, planar=2 + predictor=2 + uint16/uint32, non-tile-aligned dask streaming write, WhiteIsZero (PI=0) inversion. Deferred non-accuracy: tiled JPEG TIFFs from GDAL fail because reader does not inject JPEGTables (tag 347) into per-tile streams (interop crash, not numerical) and Orientation tag (274) is silently ignored. No CRIT findings." +geotiff,2026-05-09,"1537,1540",HIGH,4;5,"Pass 9 (2026-05-09): TWO HIGH fixed -- (a) PR #1539 closes #1537: TIFF Orientation tag 2/3/4 (mirror flips) on georeferenced files left y/x coords computed from the un-flipped transform, so xarray label lookups returned the wrong pixel even though _apply_orientation flipped the buffer. PR #1521 only updated the transform for the 5-8 axis-swap branch. Fix updates origin and pixel-scale signs along whichever axes were flipped, for both PixelIsArea (origin shifts by N*step) and PixelIsPoint (shifts by (N-1)*step). 10 new tests in test_orientation.py. (b) PR #1546 closes #1540: read_geotiff_gpu ignored Orientation tag completely; CPU correctly applied 2-8 (PR #1521) but GPU returned the raw stored buffer. Cross-backend disagreement on every non-default orientation. Fix adds _apply_orientation_gpu (cupy slicing mirror of the CPU helper) and _apply_orientation_geo_info, threads them into the tiled GPU pipeline, reuses CPU-fallback geo_info for the stripped path to avoid double-applying. 28 new tests in test_orientation_gpu.py (every orientation, single-band tiled, single-band stripped, 3-band tiled, mirror-flip sel-fidelity, default no-tag passthrough). Re-confirmed clean: HTTP coalesce_ranges with overlapping ranges and zero-length ranges, parallel streaming write thread-safety (each tile gets independent buffer via copy or padded zeros), planar=2 + chunky GPU LERC mask propagation matches CPU, IFD chain cap MAX_IFDS=256, max_z_error round-trip on tiled write, _resolve_masked_fill float vs integer dtype semantics. Deferred LOW: per-sample LERC mask (3D mask (h,w,samples)) collapsed to per-pixel ""any sample invalid"" on GPU while CPU honours per-sample; LERC implementations rarely emit 3D masks (verified: lerc.encode with 2D mask on 3-band returns 2D mask). Documented planar=2 + LERC + GPU silently drops mask (rare in practice, source comment acknowledges). | Pass 8 (2026-05-07): HIGH fixed in fix-jpeg-tiff-disable -- to_geotiff(compression='jpeg') wrote files that no external reader can decode. The writer tags compression=7 (new-style JPEG) but emits a self-contained JFIF stream per tile/strip and never writes the JPEGTables tag (347) that the TIFF spec requires for that codec. libtiff/GDAL/rasterio all reject the file with TIFFReadEncodedStrip() failed; our reader round-trips because Pillow decodes the standalone JFIF, hiding the break. Pass-4 notes flagged the read side of the same JPEGTables gap and deferred it; pass-8 covers the write side. Fix: reject compression='jpeg' at the to_geotiff entry with a clear ValueError pointing at deflate/zstd/lzw. The internal _writer.write is untouched so the existing self-decoding tests still cover the codec; re-enabling the public path needs a JPEGTables-aware encoder. PR diffs reviewed but not merged: #1512 (BytesIO source) and #1513 (LERC max_z_error) -- both look correct; #1512 file-like read path goes through read_all() once so the per-call BytesIOSource lock is theoretical, and #1513 forwards max_z_error through every overview/tile/strip/streaming path including _write_vrt_tiled and _compress_block. No regressions found in either open PR. Other surfaces audited clean: predictor=3 with float16 (writer auto-promotes to float32 on both eager and streaming paths, value-exact round-trip); planar=2 multi-tile read uses band_idx*tiles_per_band offset so no cross-contamination between planes; _header.py multi-byte tag parsing uses bo (byte_order) consistently; Pillow YCbCr-vs-tagged-RGB photometric mismatch becomes moot once JPEG is disabled. Deferred (LOW/MEDIUM, not filed): JPEG2000 writer accepts arbitrary dtype with no validation (rare codec, narrow risk); float16 dtype not in tiff_dtype_to_numpy decode map (writer never emits it - asymmetric but unreachable); Orientation tag (274) still ignored on read (pass-4 deferral). | Pass 7 (2026-05-07): HIGH fixed in fix-mmap-cache-refcount-after-replace -- _MmapCache.release() looked up the cache entry by realpath, so a holder that acquired the OLD mmap before an os.replace and released it AFTER another caller had acquired the post-replace entry would decrement the new holder's refcount. Subsequent eviction (cache full, or another acquire) closed the still-in-use mmap, breaking reads with 'mmap closed or invalid'. Real exposure: any concurrent reader/writer pattern where to_geotiff replaces a file that another reader had just opened via open_geotiff with chunks= or via _FileSource. PR #1506 added stale-replacement detection but did not fix the refcount confusion across the pop. Fix: acquire returns an opaque entry token; release takes the token and decrements that exact entry, regardless of cache state. Orphaned (popped) entries close their fh+mmap when their own refcount hits zero. _FileSource updated to pass the token. Regression test test_release_after_path_replacement_does_not_clobber_new_holder added. All 665 geotiff tests pass; GPU path verified. | Pass 6 (2026-05-07) PR #1507: BE pred2 numba TypingError. | Pass 5 (2026-05-06) PR #1506: mmap cache stale after file replace. | Pass 4 (2026-05-06) PR #1501: sparse COG tiles. | Pass 3 (2026-05-06) PR #1500: predictor=3 byte order. | Pass 2 (2026-05-05) PR #1498: predictor=2 sample-wise. | Pass 1 (2026-04-23) PR #1247. Re-confirmed clean over passes 2-7: items 2 (writer always emits LE TIFFs - hardcoded b'II'), 3 (RowsPerStrip default = height when missing), 4 (StripByteCounts missing raises clear ValueError), 5 (TileWidth without TileLength caught by 'tw <= 0 or th <= 0' check at _reader.py:688), 9 (read determinism on compressed+tiled+multiband), 11 (predictor=2 with awkward sample stride round-trips), 18 (compression_level=99 raises ValueError 'out of range for deflate (valid: 1-9)'), 21 (concurrent writes serialize correctly via mkstemp+os.replace), 24 (uint16 dtype preserved on numpy backend, dask honors chunks param), 26 (chunks rounds correctly with remainder chunk for non-tile-aligned). Deferred: item 8 (BytesIO/file-like sources are not supported, source.lower() error) - documented as 'str' parameter, not a bug; item 19 (LERC max_z_error not user-exposed by to_geotiff) - missing feature, not a bug." glcm,2026-05-01,1408,HIGH,2,"angle=None averaged NaN as 0, masking no-valid-pairs as zero texture; fixed via nanmean-style averaging" hillshade,2026-04-10T12:00:00Z,,,,"Horn's method correct. All backends consistent. NaN propagation correct. float32 adequate for [0,1] output." hydro,2026-04-30,,LOW,1,Only LOW: twi log(0)=-inf if fa=0 (out-of-contract); MFD weighted sum no Kahan (negligible). No CRIT/HIGH issues. @@ -35,6 +34,3 @@ terrain_metrics,2026-04-30,,LOW,2;5,"LOW: Inf input not rejected, propagates as visibility,2026-04-13T12:00:00Z,,,,"Bresenham line, LOS kernel, Fresnel zone all correct. All backends converge to numpy." worley,2026-05-01,,MEDIUM,2;5,"MEDIUM: numpy backend uses np.empty_like(data) so integer input dtype produces integer output (distances truncated to 0); cupy/dask paths always produce float32. LOW: freq=inf produces 100000 sentinel (sqrt of initial min_dist=1e10), no validation of freq/seed for non-finite values." zonal,2026-03-30T12:00:00Z,1090,,, -geotiff,2026-05-06,pending-pr-cache-stale,MEDIUM,5,"Pass 5 (2026-05-06): MEDIUM fixed in fix-mmap-cache-stale-inode -- _MmapCache returned stale bytes after the file at the same path was replaced (e.g. by to_geotiff which writes via tempfile + os.replace, swapping the inode). Reproduces on numpy and GPU read paths: write A, read A, write B at same path, read returns A. Fixed by storing (st_ino, st_size, st_mtime_ns) per cache entry and dropping the entry on acquire when the file ident has changed; in-use mmaps stay valid for current holders. Tests added for both atomic-rename replacement and in-place truncate-and-rewrite. | Pass 4 (2026-05-06) PR #1501: sparse COG tiles (TileByteCounts==0). | Pass 3 (2026-05-06) PR #1500: predictor=3 byte-order. | Pass 2 (2026-05-05) PR #1498: predictor=2 sample-wise. | Pass 1 (2026-04-23) PR #1247. Re-confirmed clean: south-up flipping is a documented limitation in test_georef_edges.test_y_coords_known_limitation; CRS round-trip 4326/3857/32633/5070 exact; LERC int default lossless; NaN bit patterns survive predictor=3 round-trip; RATIONAL den=0 returns 0.0; ASCII tag NUL termination spec-correct on write; adler32 in GPU deflate path is per-tile not per-chunk so chunk-combine math does not apply; GeoDoubleParams uses correct double-array offset semantics. Deferred: per-band different NODATA via GDAL_METADATA exposed in attrs but not applied (single dataset-level value used for masking)." -geotiff,2026-05-07,1507,MEDIUM,1;5,"Pass 6 (2026-05-07): MEDIUM fixed in PR #1507 - predictor=2 decode crashed with numba TypingError ('Unsupported array dtype: >u2/u4/u8') on big-endian TIFFs with multi-byte integer samples. Regression introduced by the pass-2 rework (#1498) that switched predictor=2 to a sample-wise numpy view in the file's byte order; numba nopython mode rejects non-native dtype arrays. Fix byte-swaps the buffer in place around the kernel call so the kernel sees native order and the on-disk view stays in file order. Tests cover uint16/int16/uint32/int32 BE round-trip plus LE sanity. uint8 BE+pred2 unaffected (single-byte path). Also noted but NOT fixed in this PR (one fix per PR rule): GPU read of BE multi-byte TIFFs crashes with AttributeError on out.byteswap() at _gpu_decode.py:1817 and 1556 because cupy.ndarray has no byteswap method; user-visible effect is a silent fallback to CPU which works for non-pred2 BE files. Re-confirmed clean: empty-array round-trip raises clearly (0x0, 0xN, 1x1 boundary), inline tag values <4 bytes positioned correctly, BigTIFF count overflow, IFD chain loop, RATIONAL den=0, NextIFDOffset > file size, sparse tiles, mmap cache invalidation post-#1506, BE without predictor reads correctly, 12-bit unpack, redirect handling, fp_predictor BE swizzle (#1500), nodata-out-of-dtype handled by string tag, HTTP short-read caught by size check at _reader.py:440, concurrent fetch propagates first error via ThreadPoolExecutor exit. | Pass 5 (2026-05-06) PR #1506: mmap cache stale after file replace. | Pass 4 (2026-05-06) PR #1501: sparse COG tiles. | Pass 3 (2026-05-06) PR #1500: predictor=3 byte order. | Pass 2 (2026-05-05) PR #1498: predictor=2 sample-wise. | Pass 1 (2026-04-23) PR #1247." -geotiff,2026-05-07,pending-pr-jpeg-disable,HIGH,5;3,"Pass 8 (2026-05-07): HIGH fixed in fix-jpeg-tiff-disable -- to_geotiff(compression='jpeg') wrote files that no external reader can decode. The writer tags compression=7 (new-style JPEG) but emits a self-contained JFIF stream per tile/strip and never writes the JPEGTables tag (347) that the TIFF spec requires for that codec. libtiff/GDAL/rasterio all reject the file with TIFFReadEncodedStrip() failed; our reader round-trips because Pillow decodes the standalone JFIF, hiding the break. Pass-4 notes flagged the read side of the same JPEGTables gap and deferred it; pass-8 covers the write side. Fix: reject compression='jpeg' at the to_geotiff entry with a clear ValueError pointing at deflate/zstd/lzw. The internal _writer.write is untouched so the existing self-decoding tests still cover the codec; re-enabling the public path needs a JPEGTables-aware encoder. PR diffs reviewed but not merged: #1512 (BytesIO source) and #1513 (LERC max_z_error) -- both look correct; #1512 file-like read path goes through read_all() once so the per-call BytesIOSource lock is theoretical, and #1513 forwards max_z_error through every overview/tile/strip/streaming path including _write_vrt_tiled and _compress_block. No regressions found in either open PR. Other surfaces audited clean: predictor=3 with float16 (writer auto-promotes to float32 on both eager and streaming paths, value-exact round-trip); planar=2 multi-tile read uses band_idx*tiles_per_band offset so no cross-contamination between planes; _header.py multi-byte tag parsing uses bo (byte_order) consistently; Pillow YCbCr-vs-tagged-RGB photometric mismatch becomes moot once JPEG is disabled. Deferred (LOW/MEDIUM, not filed): JPEG2000 writer accepts arbitrary dtype with no validation (rare codec, narrow risk); float16 dtype not in tiff_dtype_to_numpy decode map (writer never emits it - asymmetric but unreachable); Orientation tag (274) still ignored on read (pass-4 deferral). | Pass 7 (2026-05-07): HIGH fixed in fix-mmap-cache-refcount-after-replace -- _MmapCache.release() looked up the cache entry by realpath, so a holder that acquired the OLD mmap before an os.replace and released it AFTER another caller had acquired the post-replace entry would decrement the new holder's refcount. Subsequent eviction (cache full, or another acquire) closed the still-in-use mmap, breaking reads with 'mmap closed or invalid'. Real exposure: any concurrent reader/writer pattern where to_geotiff replaces a file that another reader had just opened via open_geotiff with chunks= or via _FileSource. PR #1506 added stale-replacement detection but did not fix the refcount confusion across the pop. Fix: acquire returns an opaque entry token; release takes the token and decrements that exact entry, regardless of cache state. Orphaned (popped) entries close their fh+mmap when their own refcount hits zero. _FileSource updated to pass the token. Regression test test_release_after_path_replacement_does_not_clobber_new_holder added. All 665 geotiff tests pass; GPU path verified. | Pass 6 (2026-05-07) PR #1507: BE pred2 numba TypingError. | Pass 5 (2026-05-06) PR #1506: mmap cache stale after file replace. | Pass 4 (2026-05-06) PR #1501: sparse COG tiles. | Pass 3 (2026-05-06) PR #1500: predictor=3 byte order. | Pass 2 (2026-05-05) PR #1498: predictor=2 sample-wise. | Pass 1 (2026-04-23) PR #1247. Re-confirmed clean over passes 2-7: items 2 (writer always emits LE TIFFs - hardcoded b'II'), 3 (RowsPerStrip default = height when missing), 4 (StripByteCounts missing raises clear ValueError), 5 (TileWidth without TileLength caught by 'tw <= 0 or th <= 0' check at _reader.py:688), 9 (read determinism on compressed+tiled+multiband), 11 (predictor=2 with awkward sample stride round-trips), 18 (compression_level=99 raises ValueError 'out of range for deflate (valid: 1-9)'), 21 (concurrent writes serialize correctly via mkstemp+os.replace), 24 (uint16 dtype preserved on numpy backend, dask honors chunks param), 26 (chunks rounds correctly with remainder chunk for non-tile-aligned). Deferred: item 8 (BytesIO/file-like sources are not supported, source.lower() error) - documented as 'str' parameter, not a bug; item 19 (LERC max_z_error not user-exposed by to_geotiff) - missing feature, not a bug." From a1b689fdef69fb2dbe20d930862cd4e7a1c4776c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 10 May 2026 17:57:31 -0700 Subject: [PATCH 3/3] Gate orientation 2/3/4 transform update on has_georef (PR #1539 review) The previous guard checked `transform is not None`, but `GeoInfo.transform` is a dataclass with a default GeoTransform factory so the check was always true. Plain TIFFs with an Orientation tag and no GeoTIFF tags ended up exposing a fabricated transform via attrs, even though downstream consumers correctly fall back to integer pixel coords for `has_georef=False`. Now `has_georef` gates both the 2/3/4 origin/sign update and the 5-8 axis swap, so the transform attr stays at the dataclass default for non-georef files. Adds `test_orient_2_3_4_no_geo_does_not_modify_transform` to pin the attrs-stay-default behaviour. --- xrspatial/geotiff/_reader.py | 9 +++++- xrspatial/geotiff/tests/test_orientation.py | 33 +++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index af2c7a27..cff5cfe3 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -1574,7 +1574,14 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, # rare in practice (TIFF Orientation 5-8 with a meaningful # ModelTransformation); warn so the user knows to verify. t = geo_info.transform - if t is not None and orientation in (2, 3, 4): + # Only georeferenced files have a meaningful transform to flip. + # Plain TIFFs with an Orientation tag but no GeoTIFF tags get + # their pixel buffer remapped above; their default transform + # is left untouched and the downstream consumer falls back to + # integer pixel coords. + if not geo_info.has_georef: + pass + elif orientation in (2, 3, 4): # PixelIsPoint tiepoints are at pixel centers, so the # opposite-edge pixel sits ``(N-1) * step`` away. PixelIsArea # tiepoints are at pixel edges, so the opposite edge is diff --git a/xrspatial/geotiff/tests/test_orientation.py b/xrspatial/geotiff/tests/test_orientation.py index de2601f8..32c8773d 100644 --- a/xrspatial/geotiff/tests/test_orientation.py +++ b/xrspatial/geotiff/tests/test_orientation.py @@ -394,6 +394,39 @@ def test_orient_2_3_4_no_geo_still_uses_pixel_coords(tmp_path): ) +def test_orient_2_3_4_no_geo_does_not_modify_transform(tmp_path): + """Non-georef file: transform attr stays at the default after orient 2/3/4. + + Earlier draft fabricated origin/sign for the default GeoTransform on + un-georeferenced files because the gating only checked + ``transform is not None`` and the dataclass default is non-None. + Downstream consumers rightly fall back to integer pixel coords for + these files, but exposing a fake transform via attrs misleads any + direct attrs reader. Gate must be ``has_georef``. + """ + arr = np.arange(24, dtype=np.uint8).reshape(4, 6) + # Default GeoTransform on a fresh file the writer leaves untouched. + baseline_path = tmp_path / "orient_no_geo_baseline.tif" + tifffile.imwrite( + str(baseline_path), arr, + extratags=[(274, 'H', 1, 1, True)], + ) + baseline_transform = open_geotiff(str(baseline_path)).attrs.get('transform') + + for orientation in (2, 3, 4): + path = tmp_path / f"orient_no_geo_xform_{orientation}.tif" + tifffile.imwrite( + str(path), arr, + extratags=[(274, 'H', 1, orientation, True)], + ) + da = open_geotiff(str(path)) + assert da.attrs.get('transform') == baseline_transform, ( + f"orient={orientation} on a non-georef file modified " + f"attrs['transform']: got {da.attrs.get('transform')}, " + f"expected baseline {baseline_transform}" + ) + + def test_orientation_with_band_selection_returns_2d(tmp_path): """band= followed by an orientation transpose returns a 2D array.