From 1ec50c78b954f98e21a227b961b9dd62107dcc4d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 11 May 2026 18:06:50 -0700 Subject: [PATCH 1/2] Block write_geotiff_gpu(file_like, cog=True) for parity with to_geotiff (#1652) to_geotiff has long rejected cog=True + file-like destinations, but the explicit write_geotiff_gpu entry point silently accepted the combo and emitted a COG into the buffer. The two writers should agree on which inputs they refuse: to_geotiff(gpu=True, cog=True, path=BytesIO) raises, so write_geotiff_gpu(da, BytesIO, cog=True) should too. Mirror the existing to_geotiff guard on the GPU entry point. Non-cog file-like writes remain supported on this path (the gate is targeted at cog=True only). Add regression coverage in test_bytesio_source.py. Also clarify the path/compression docstring on write_geotiff_gpu: - path: document that file-like destinations are accepted (cog=True requires a string path). - compression: list the full codec set the function actually accepts and note the deliberate JPEG asymmetry with to_geotiff (#1651 downgraded to docs-only after PR #1647 confirmed advanced-API intent). Update .claude/sweep-api-consistency-state.csv with the 2026-05-11 re-audit row. --- .claude/sweep-api-consistency-state.csv | 1 + xrspatial/geotiff/__init__.py | 21 +++++++-- .../geotiff/tests/test_bytesio_source.py | 47 +++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index f21c7fe2..0d21cfdd 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,3 +1,4 @@ module,last_inspected,issue,severity_max,categories_found,notes geotiff,2026-05-11,1644,MEDIUM,3,"Filed write_geotiff_gpu compression docstring drift vs to_geotiff (MEDIUM Cat 3, #1644). Fix on deep-sweep-api-consistency-geotiff-2026-05-11-1778545740: sync the full 9-codec list into the docstring and note GPU vs CPU encode paths; regression test test_compression_docstring_1644.py pins the codec list and exercises each CPU-fallback codec end-to-end. Other potential drifts surveyed: write_vrt returns str while to_geotiff/write_geotiff_gpu return None (LOW, intentional backward-compat); write_vrt nodata typed float|None vs int-accepting siblings (LOW, PEP 484 int->float compat); kwarg-only ordering drift across read functions (LOW, no user impact). Prior issues 1631/1637/1615/1560/1541/1562 all CLOSED." +geotiff,2026-05-11,1652,MEDIUM,5,"Filed MEDIUM file-like cog=True drift #1652 (write_geotiff_gpu accepted BytesIO+cog=True; to_geotiff blocked it). Fixed in PR (TBD): mirror to_geotiff's gate on the explicit GPU writer; add regression tests in test_bytesio_source.py. Also filed #1651 (JPEG acceptance drift) but downgraded to LOW after #1647 confirmed write_geotiff_gpu(jpeg) is deliberate advanced-API; PR (TBD) carries the docstring clarification. Prior 1631/1644 noted in earlier rows (1644 open, fix in PR #1649). LOW: streaming_buffer_bytes default drift to_geotiff=256MB vs write_geotiff_gpu=None (no functional impact, explicit forwarding); to_geotiff data: annotation misses cupy.ndarray (accepted via auto-dispatch). cuda-validated." reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)." diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 6d84b74d..f24c3bdd 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -2714,7 +2714,7 @@ def _read_once(): def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, - path: str, *, + path, *, crs: int | str | None = None, nodata=None, compression: str = 'zstd', @@ -2747,8 +2747,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, 2D or 3D raster. CuPy-backed inputs stay on device; NumPy/Dask inputs are uploaded via ``cupy.asarray(np.asarray(data))`` before compression (matches ``to_geotiff`` parity). - path : str - Output file path. + path : str or binary file-like + Output file path or any object with a ``write`` method + (e.g. ``io.BytesIO``). ``cog=True`` requires a string path: + the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)`` + rejects file-like destinations, and the explicit GPU writer + mirrors that rule (issue #1652). crs : int, str, or None EPSG code or WKT string. nodata : float, int, or None @@ -2837,6 +2841,17 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "max_z_error is not supported on the GPU writer " "(nvCOMP has no LERC backend). Use to_geotiff(..., gpu=False) " "or omit max_z_error.") + # Mirror to_geotiff's file-like + cog=True rejection. The auto-dispatch + # path through ``to_geotiff(gpu=True, cog=True, path=BytesIO)`` raises + # before reaching here; the explicit GPU writer mirrors the gate so + # callers cannot bypass it (issue #1652). Non-cog file-like writes + # remain supported on this entry point. + _path_is_file_like = ( + not isinstance(path, str)) and hasattr(path, 'write') + if _path_is_file_like and cog: + raise ValueError( + "cog=True is not supported for file-like destinations on the " + "GPU writer. Pass a string path or set cog=False.") # streaming_buffer_bytes is intentionally a no-op on the GPU path; # the kwarg exists for API parity with to_geotiff so callers can pass # the same kwargs to both entry points without filtering. diff --git a/xrspatial/geotiff/tests/test_bytesio_source.py b/xrspatial/geotiff/tests/test_bytesio_source.py index 803aeca2..757c2814 100644 --- a/xrspatial/geotiff/tests/test_bytesio_source.py +++ b/xrspatial/geotiff/tests/test_bytesio_source.py @@ -268,3 +268,50 @@ def seek(self, *a, **k): assert _is_file_like(io.BytesIO(b'x')) is True assert _is_file_like(ReadSeekNoTell()) is False + + +class TestWriteGeotiffGpuBytesIO: + """Regression coverage for ``write_geotiff_gpu`` file-like behaviour. + + ``to_geotiff(gpu=True, ...)`` always rejects BytesIO destinations paired + with ``cog=True`` (the auto-dispatch path's existing guard). The explicit + GPU writer used to silently accept that combo and produce a COG into the + buffer, so the two entry points disagreed on what ``to_geotiff(gpu=True, + cog=True, path=BytesIO)`` does. These tests pin the mirrored gate added + by issue #1652 and confirm the non-cog file-like path still works. + """ + + def test_cog_with_bytesio_rejected_1652(self): + cupy = pytest.importorskip("cupy") + da = xr.DataArray( + cupy.asarray(np.random.rand(64, 64).astype(np.float32)), + dims=['y', 'x'], + coords={'y': np.arange(64.0), 'x': np.arange(64.0)}, + attrs={'crs': 4326}, + ) + from xrspatial.geotiff import write_geotiff_gpu + + buf = io.BytesIO() + with pytest.raises(ValueError, match='cog=True'): + write_geotiff_gpu(da, buf, cog=True) + + def test_non_cog_bytesio_still_works_1652(self): + cupy = pytest.importorskip("cupy") + arr_cpu = np.random.rand(64, 64).astype(np.float32) + da = xr.DataArray( + cupy.asarray(arr_cpu), + dims=['y', 'x'], + coords={'y': np.arange(64.0), 'x': np.arange(64.0)}, + attrs={'crs': 4326}, + ) + from xrspatial.geotiff import write_geotiff_gpu + + buf = io.BytesIO() + # Non-cog file-like write is still supported on the explicit GPU + # writer; only cog=True is gated. + write_geotiff_gpu(da, buf) + assert len(buf.getvalue()) > 0 + + # Verify it round-trips through open_geotiff + rd = open_geotiff(io.BytesIO(buf.getvalue())) + np.testing.assert_allclose(np.asarray(rd.values), arr_cpu) From d91d78c7ef21b6e54a0d8ad7bb3fb8803fd81bd3 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 05:15:26 -0700 Subject: [PATCH 2/2] Address Copilot review feedback on #1653 - Mirror to_geotiff's cog=True + file-like rejection verbatim (same error string), so callers see identical messages from either entry point. Previously write_geotiff_gpu raised a different message that added "...on the GPU writer..." and dropped the BytesIO hint. - Add a TypeError gate for non-str, non-file-like path arguments, so passing e.g. an int falls through to a clear TypeError instead of an os.path / unicode error deep in the writer. Mirrors to_geotiff's existing TypeError verbatim. - Harden the BytesIO + write_geotiff_gpu tests with the repo's standard _gpu_available() helper (cupy.cuda.is_available() + ImportError guard) instead of pytest.importorskip('cupy'), so CI hosts where CuPy imports but CUDA is unavailable skip cleanly rather than hard-failing in cupy.asarray(). - Add two new regression tests: one pinning byte-for-byte parity of the cog/file-like error message between the two writers, and one pinning the new TypeError on invalid path types. Skip: the low-confidence type annotation suggestion (path: str|os.PathLike|SupportsWrite[bytes]). The other path-accepting functions in this module (to_geotiff, write_vrt, etc.) deliberately leave path untyped; adding a precise union here would diverge from the local convention for marginal benefit. All 17 tests in test_bytesio_source.py pass. --- xrspatial/geotiff/__init__.py | 20 +++-- .../geotiff/tests/test_bytesio_source.py | 74 ++++++++++++++++++- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index f24c3bdd..41d89fa2 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -2841,17 +2841,23 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "max_z_error is not supported on the GPU writer " "(nvCOMP has no LERC backend). Use to_geotiff(..., gpu=False) " "or omit max_z_error.") - # Mirror to_geotiff's file-like + cog=True rejection. The auto-dispatch + # Mirror to_geotiff's path-type + cog=True gating verbatim so callers + # see identical errors from the two entry points. The auto-dispatch # path through ``to_geotiff(gpu=True, cog=True, path=BytesIO)`` raises - # before reaching here; the explicit GPU writer mirrors the gate so - # callers cannot bypass it (issue #1652). Non-cog file-like writes + # before reaching here; the explicit GPU writer mirrors the same gate + # so callers cannot bypass it (issue #1652). Non-cog file-like writes # remain supported on this entry point. _path_is_file_like = ( not isinstance(path, str)) and hasattr(path, 'write') - if _path_is_file_like and cog: - raise ValueError( - "cog=True is not supported for file-like destinations on the " - "GPU writer. Pass a string path or set cog=False.") + if _path_is_file_like: + if cog: + raise ValueError( + "cog=True is not supported for file-like destinations. " + "Pass a string path or write to BytesIO without cog=True.") + elif not isinstance(path, str): + raise TypeError( + f"path must be a str or a binary file-like with a write() " + f"method, got {type(path).__name__}") # streaming_buffer_bytes is intentionally a no-op on the GPU path; # the kwarg exists for API parity with to_geotiff so callers can pass # the same kwargs to both entry points without filtering. diff --git a/xrspatial/geotiff/tests/test_bytesio_source.py b/xrspatial/geotiff/tests/test_bytesio_source.py index 757c2814..c950a920 100644 --- a/xrspatial/geotiff/tests/test_bytesio_source.py +++ b/xrspatial/geotiff/tests/test_bytesio_source.py @@ -6,6 +6,7 @@ """ from __future__ import annotations +import importlib.util import io from concurrent.futures import ThreadPoolExecutor @@ -17,6 +18,26 @@ from xrspatial.geotiff._reader import _BytesIOSource, read_to_array +def _gpu_available() -> bool: + """True when cupy imports AND a CUDA runtime is initialised. + + Mirrors the helper used in other geotiff GPU tests so the BytesIO + GPU-writer tests skip cleanly on hosts where CuPy is installed but + CUDA is unavailable (Copilot review on #1653). + """ + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + def _make_da(height=32, width=40, dtype=np.float32): arr = np.arange(height * width, dtype=dtype).reshape(height, width) # Simple geotransform with negative pixel_height (north-up) @@ -281,8 +302,9 @@ class TestWriteGeotiffGpuBytesIO: by issue #1652 and confirm the non-cog file-like path still works. """ + @_gpu_only def test_cog_with_bytesio_rejected_1652(self): - cupy = pytest.importorskip("cupy") + import cupy da = xr.DataArray( cupy.asarray(np.random.rand(64, 64).astype(np.float32)), dims=['y', 'x'], @@ -295,8 +317,56 @@ def test_cog_with_bytesio_rejected_1652(self): with pytest.raises(ValueError, match='cog=True'): write_geotiff_gpu(da, buf, cog=True) + @_gpu_only + def test_cog_with_bytesio_error_matches_to_geotiff_1652(self): + """The error string must match ``to_geotiff``'s gate verbatim so + downstream callers can rely on a single message (Copilot review + on #1653).""" + import cupy + da = xr.DataArray( + cupy.asarray(np.random.rand(64, 64).astype(np.float32)), + dims=['y', 'x'], + coords={'y': np.arange(64.0), 'x': np.arange(64.0)}, + attrs={'crs': 4326}, + ) + from xrspatial.geotiff import write_geotiff_gpu + + # to_geotiff's canonical message; mirrored verbatim in + # write_geotiff_gpu's gate. + expected = ( + "cog=True is not supported for file-like destinations. " + "Pass a string path or write to BytesIO without cog=True." + ) + + buf = io.BytesIO() + with pytest.raises(ValueError) as exc_info: + write_geotiff_gpu(da, buf, cog=True) + assert str(exc_info.value) == expected + + # And the CPU writer raises the same string for parity. + with pytest.raises(ValueError) as exc_info_cpu: + to_geotiff(_make_da(), io.BytesIO(), cog=True) + assert str(exc_info_cpu.value) == expected + + @_gpu_only + def test_invalid_path_type_raises_typeerror_1652(self): + """Mirror to_geotiff's TypeError for non-str, non-file-like paths + so callers see identical behaviour from both entry points.""" + import cupy + da = xr.DataArray( + cupy.asarray(np.random.rand(64, 64).astype(np.float32)), + dims=['y', 'x'], + coords={'y': np.arange(64.0), 'x': np.arange(64.0)}, + attrs={'crs': 4326}, + ) + from xrspatial.geotiff import write_geotiff_gpu + + with pytest.raises(TypeError, match="path must be a str"): + write_geotiff_gpu(da, 42) # int is neither str nor file-like + + @_gpu_only def test_non_cog_bytesio_still_works_1652(self): - cupy = pytest.importorskip("cupy") + import cupy arr_cpu = np.random.rand(64, 64).astype(np.float32) da = xr.DataArray( cupy.asarray(arr_cpu),