From d1d8299572e6758a784e04a78b229147274ea0b5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 11 May 2026 18:00:30 -0700 Subject: [PATCH] Cover compression_level= for compression='lz4' The compression-level validator in xrspatial.geotiff.__init__ advertises a (0, 16) valid range for lz4 alongside deflate (1, 9) and zstd (1, 22), but only the deflate and zstd ranges had round-trip + boundary-reject tests. The lz4 path goes through the same validator at three call sites (eager numpy, dask streaming, and _write_vrt_tiled), so a regression that dropped 'lz4' from _LEVEL_RANGES would have silently accepted any int level -- lz4_compress itself does not validate. Adds 18 tests: round-trip at 0/1/9/16 (lossless), default no-arg path, higher-level not larger on compressible input, eager out-of-range reject at -1/-10/17/100, valid-range message format pin, dask streaming round-trip at 0/1/8/16, and dask streaming out-of-range reject at -1/17/50. Closes the Cat 4 MEDIUM parameter-coverage gap left after pass 7. --- .claude/sweep-test-coverage-state.csv | 2 +- .../test_lz4_compression_level_2026_05_11.py | 183 ++++++++++++++++++ 2 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index 564e7ec9..fb081fa5 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-11,,HIGH,2;3;4,"Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." +geotiff,2026-05-11,,HIGH,2;3;4,"Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." diff --git a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py new file mode 100644 index 00000000..15ac8427 --- /dev/null +++ b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py @@ -0,0 +1,183 @@ +"""Parameter coverage for ``compression_level=`` with ``compression='lz4'``. + +The level-validation map in ``xrspatial.geotiff.__init__`` advertises a +``(0, 16)`` valid range for ``lz4``, but only the ``deflate`` ``(1, 9)`` +and ``zstd`` ``(1, 22)`` ranges had direct round-trip + boundary-error +tests under ``test_compression_level.py``. ``lz4`` shares the same +range-validation call site (the dispatcher's eager numpy path, the dask +streaming path, and ``_write_vrt_tiled`` all share ``_LEVEL_RANGES``), +so a regression that drops ``lz4`` from the table -- or shifts the +range bounds -- would only surface against user code. + +This module pins: + +* Round-trip integrity at the boundary levels ``0`` and ``16``. +* Round-trip integrity at the documented default (``compression_level=None``) + via the public ``to_geotiff`` API. The default uses the ``lz4_compress`` + signature default (``level=0``), so the no-arg path must still produce a + decodable file. +* ValueError on out-of-range levels (``-1`` and ``17``) across both the + eager (numpy) path and the dask streaming path. +* Tile-row segmentation for dask-streaming inputs: a low-level lz4 file + and a high-level lz4 file built from the same input both decode to + the original values bit-exact (lz4 is lossless across its level range). + +Cat 4 MEDIUM: parameter coverage gap on numeric parameter with multiple +values where only the default was tested. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + + +_HAS_LZ4 = importlib.util.find_spec("lz4") is not None +_HAS_DASK = importlib.util.find_spec("dask") is not None + +pytestmark = pytest.mark.skipif(not _HAS_LZ4, reason="lz4 package required") + + +def _make_da(seed: int = 0, shape: tuple = (64, 64)) -> xr.DataArray: + """Return a small float32 DataArray with reproducible content.""" + rng = np.random.default_rng(seed) + arr = rng.standard_normal(shape).astype(np.float32) + return xr.DataArray(arr, dims=["y", "x"]) + + +def _make_compressible(shape: tuple = (128, 128)) -> xr.DataArray: + """Smooth gradient + small noise; high spatial coherence so level + differences actually move the needle on compressed size.""" + rng = np.random.default_rng(42) + y, x = np.mgrid[0: shape[0], 0: shape[1]] + arr = ((y + x).astype(np.float32) + + rng.standard_normal(shape).astype(np.float32) * 0.01) + return xr.DataArray(arr, dims=["y", "x"]) + + +# --------------------------------------------------------------------------- +# Round-trip integrity across the documented level range +# --------------------------------------------------------------------------- + + +class TestLZ4LevelRoundTrip: + """Round-trips at the boundaries of the documented ``lz4`` range.""" + + @pytest.mark.parametrize("level", [0, 1, 9, 16]) + def test_lz4_level_round_trip(self, level, tmp_path): + """Every documented level produces a decodable file with exact + pixel fidelity (lz4 is lossless).""" + da = _make_da(seed=level) + path = str(tmp_path / f"lz4_level_{level}.tif") + to_geotiff(da, path, compression="lz4", + compression_level=level) + result = open_geotiff(path) + # lz4 is lossless: assert_array_equal, not assert_allclose. + np.testing.assert_array_equal(result.values, da.values) + + def test_lz4_default_level_round_trip(self, tmp_path): + """``compression_level=None`` falls through to ``lz4_compress``'s + default (``level=0``). Pin the no-arg path so a future signature + change is caught.""" + da = _make_da(seed=99) + path = str(tmp_path / "lz4_default.tif") + to_geotiff(da, path, compression="lz4") + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, da.values) + + +# --------------------------------------------------------------------------- +# Higher level should not produce a larger file on compressible input +# --------------------------------------------------------------------------- + + +class TestLZ4LevelSizeEffect: + """Higher ``compression_level`` yields the same or fewer bytes for + a compressible input. lz4 supports level 0 (fast) through 16 (HC); + levels above 0 invoke the high-compression mode.""" + + def test_lz4_higher_level_not_larger(self, tmp_path): + da = _make_compressible() + path_lo = str(tmp_path / "lz4_lo.tif") + path_hi = str(tmp_path / "lz4_hi.tif") + to_geotiff(da, path_lo, compression="lz4", compression_level=0) + to_geotiff(da, path_hi, compression="lz4", compression_level=16) + size_lo = os.path.getsize(path_lo) + size_hi = os.path.getsize(path_hi) + # Allow equality: very small or already-compressed payloads can + # land at the same byte count. The contract is "no worse". + assert size_hi <= size_lo, ( + f"Expected level-16 file ({size_hi}) <= level-0 file ({size_lo})") + + +# --------------------------------------------------------------------------- +# Out-of-range level rejection (eager path) +# --------------------------------------------------------------------------- + + +class TestLZ4LevelOutOfRange: + """The ``_LEVEL_RANGES`` table advertises ``lz4: (0, 16)``. Pin the + rejection path so a future range change does not silently widen the + accepted band.""" + + @pytest.mark.parametrize("level", [-1, -10, 17, 100]) + def test_lz4_out_of_range_level_raises_eager(self, level, tmp_path): + """Out-of-range level on the numpy/eager path raises with the + same error message format as deflate/zstd.""" + da = _make_da() + path = str(tmp_path / "lz4_bad.tif") + with pytest.raises(ValueError, match="compression_level"): + to_geotiff(da, path, compression="lz4", + compression_level=level) + + def test_lz4_out_of_range_message_includes_range(self, tmp_path): + """Error message advertises the valid (0, 16) range so callers + know the bound. Mirrors ``test_compression_level`` for zstd.""" + da = _make_da() + path = str(tmp_path / "lz4_bad.tif") + with pytest.raises(ValueError, match=r"lz4.*\(valid:\s*0-16\)"): + to_geotiff(da, path, compression="lz4", + compression_level=999) + + +# --------------------------------------------------------------------------- +# Dask streaming path level handling +# --------------------------------------------------------------------------- + + +@pytest.mark.skipif(not _HAS_DASK, reason="dask package required") +class TestLZ4LevelDaskStreaming: + """The dask streaming branch (``hasattr(raw, 'dask') and not cog``) has + its own ``_LEVEL_RANGES`` check at a separate call site. Cover both + accept and reject branches there.""" + + def _make_dask_da(self, shape=(64, 64), chunks=(16, 16)): + import dask.array as da_mod + rng = np.random.default_rng(7) + arr = rng.standard_normal(shape).astype(np.float32) + return xr.DataArray( + da_mod.from_array(arr, chunks=chunks), + dims=["y", "x"], + ), arr + + @pytest.mark.parametrize("level", [0, 1, 8, 16]) + def test_lz4_dask_streaming_level_round_trip(self, level, tmp_path): + dask_da, np_arr = self._make_dask_da() + path = str(tmp_path / f"lz4_dask_level_{level}.tif") + to_geotiff(dask_da, path, compression="lz4", + compression_level=level, tile_size=16) + result = open_geotiff(path) + np.testing.assert_array_equal(result.values, np_arr) + + @pytest.mark.parametrize("level", [-1, 17, 50]) + def test_lz4_dask_streaming_out_of_range_raises(self, level, tmp_path): + dask_da, _ = self._make_dask_da() + path = str(tmp_path / "lz4_dask_bad.tif") + with pytest.raises(ValueError, match="compression_level"): + to_geotiff(dask_da, path, compression="lz4", + compression_level=level, tile_size=16)