From ed61b1e29e551e14deadc048594e1fa6c3e007c4 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 17:34:19 -0700 Subject: [PATCH 01/14] first commit --- src/groundlight/client.py | 6 ++++- src/groundlight/images.py | 35 ++++++++++++++++++++++++++ test/integration/test_groundlight.py | 20 +++++++++++++++ test/unit/test_imagefuncs.py | 37 ++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 2318cab2..08699447 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -43,7 +43,7 @@ from groundlight.binary_labels import Label, convert_internal_label_to_display from groundlight.config import API_TOKEN_MISSING_HELP_MESSAGE, API_TOKEN_VARIABLE_NAME, DISABLE_TLS_VARIABLE_NAME from groundlight.encodings import url_encode_dict -from groundlight.images import ByteStreamWrapper, parse_supported_image_types +from groundlight.images import ByteStreamWrapper, parse_supported_image_types, recompress_shrink_image from groundlight.internalapi import ( GroundlightApiClient, NotFoundError, @@ -799,6 +799,10 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t detector_id = detector.id if isinstance(detector, Detector) else detector image_bytesio: ByteStreamWrapper = parse_supported_image_types(image) + # Match the cloud's ingest pipeline locally. Saves bandwidth and ensures Edge + # Endpoints, which do not run this step, see the same input distribution + # cloud-trained models were trained on. + image_bytesio = ByteStreamWrapper(data=recompress_shrink_image(image_bytesio.read())) params = { "detector_id": detector_id, diff --git a/src/groundlight/images.py b/src/groundlight/images.py index a8d2714f..6d93d260 100644 --- a/src/groundlight/images.py +++ b/src/groundlight/images.py @@ -7,6 +7,19 @@ DEFAULT_JPEG_QUALITY = 95 +# The Groundlight cloud applies a recompress/shrink step on ingest. Doing the same +# work client-side saves bandwidth and ensures Edge Endpoints, which do not run +# this step, see the same input distribution that cloud-trained models expect. +# +# The constants and algorithm below mirror zuuul's implementation. Source of truth: +# - zuuul/janzu/apparati/imgtools.py::recompress_shrink_image +# - zuuul/janzu/reef_api/utils.py::_save_image (gate) +# - zuuul/janzu/authz/user-settings-defaults.yaml (default values) +# If the cloud's behavior changes, update these together. +MAX_BYTES_IMAGE_SIZE = 256_000 +MAX_IMAGE_RESOLUTION_LONGSIDE = 1024 +RECOMPRESS_SHRINK_IMAGE_JPEG_QUALITY = 85 + class ByteStreamWrapper(IOBase): """This class acts as a thin wrapper around bytes in order to @@ -78,6 +91,28 @@ def bytestream_from_pil(pil_image: Image.Image, jpeg_quality: int = DEFAULT_JPEG return ByteStreamWrapper(data=bytesio) +def recompress_shrink_image(jpeg: bytes) -> bytes: + """Shrink and re-encode an oversized JPEG to match the cloud's ingest pipeline. + + If the input is already at or below MAX_BYTES_IMAGE_SIZE, returns it unchanged. + Otherwise, decodes the image, scales it (BICUBIC, aspect-ratio preserved) so the + longest side is at most MAX_IMAGE_RESOLUTION_LONGSIDE, and re-encodes as JPEG. + + Already-lossy JPEGs are decoded and re-encoded, which is the same lossy step the + cloud has been doing for years; net quality reaching the ML pipeline is unchanged. + """ + if len(jpeg) <= MAX_BYTES_IMAGE_SIZE: + return jpeg + img = Image.open(BytesIO(jpeg)).convert("RGB") + if max(img.size) > MAX_IMAGE_RESOLUTION_LONGSIDE: + ratio = MAX_IMAGE_RESOLUTION_LONGSIDE / max(img.size) + new_size = (int(img.width * ratio), int(img.height * ratio)) + img = img.resize(new_size, resample=Image.Resampling.BICUBIC) + buf = BytesIO() + img.save(buf, "jpeg", quality=RECOMPRESS_SHRINK_IMAGE_JPEG_QUALITY) + return buf.getvalue() + + def parse_supported_image_types( image: Union[str, bytes, Image.Image, BytesIO, BufferedReader, np.ndarray], jpeg_quality: int = 95, diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index c96f4ae9..a3e0f298 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -5,11 +5,13 @@ import random import string import time +from io import BytesIO from typing import Any, Callable, Dict, Optional, Union import pytest from groundlight import Groundlight from groundlight.binary_labels import VALID_DISPLAY_LABELS, Label, convert_internal_label_to_display +from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE, jpeg_from_numpy from groundlight.internalapi import ApiException, NotFoundError from groundlight.optional_imports import * from groundlight.status_codes import is_user_error @@ -368,6 +370,24 @@ def test_submit_image_query_png(gl: Groundlight, detector: Detector): assert is_valid_display_result(_image_query.result) +@retry_on_failure() +def test_submit_image_query_shrinks_oversized_image(gl: Groundlight, detector: Detector): + """Verifies the SDK shrinks oversized images client-side and the cloud stores the shrunken version. + + Detects drift between the SDK and zuuul: if either side changes its algorithm such that + the cloud-stored dimensions differ from what the SDK produces locally, this test fails. + Does not catch zuuul becoming more permissive (the SDK would still shrink to a smaller + image that zuuul accepts as-is); that direction is benign and intentionally not covered. + """ + np.random.seed(0) + # Random noise compresses poorly, so 3000x4000 is well above the 256 KB threshold. + big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + iq = gl.submit_image_query(detector=detector.id, image=big, human_review="NEVER") + stored = Image.open(BytesIO(gl.get_image(iq.id))) + # 3000x4000 scaled so longest side == 1024 preserves the 3:4 aspect ratio. + assert stored.size == (MAX_IMAGE_RESOLUTION_LONGSIDE, 768) + + @retry_on_failure() def test_submit_image_query_with_confidence_threshold(gl: Groundlight, detector: Detector): confidence_threshold = 0.5234 # Arbitrary specific value diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index 835aac1a..6b13c73e 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -90,6 +90,43 @@ def test_pil_support_ref(): assert img2.size == (509, 339) +@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore +def test_recompress_shrink_image_small_returns_unchanged(): + """Images at or below the byte threshold are passed through untouched.""" + np.random.seed(0) + small = jpeg_from_numpy(np.random.uniform(0, 255, (200, 200, 3))) + assert len(small) <= MAX_BYTES_IMAGE_SIZE + assert recompress_shrink_image(small) is small + + +@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore +def test_recompress_shrink_image_oversized_dimensions_get_resized(): + """Images above the byte threshold with longest side > 1024 are downscaled.""" + np.random.seed(0) + # Random noise compresses poorly, so 3000x4000 easily exceeds the 256 KB threshold. + big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + assert len(big) > MAX_BYTES_IMAGE_SIZE + out = recompress_shrink_image(big) + out_img = Image.open(BytesIO(out)) + # 3000x4000 scaled so longest side == 1024 preserves the 3:4 aspect ratio. + assert out_img.size == (1024, 768) + + +@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore +def test_recompress_shrink_image_oversized_bytes_only_gets_reencoded(): + """Images above the byte threshold but with longest side <= 1024 are re-encoded only.""" + np.random.seed(0) + arr = np.random.uniform(0, 255, (768, 1024, 3)) + high_q = jpeg_from_numpy(arr, jpeg_quality=99) + assert len(high_q) > MAX_BYTES_IMAGE_SIZE + out = recompress_shrink_image(high_q) + out_img = Image.open(BytesIO(out)) + assert out_img.size == (1024, 768) + # Bytes changed (proves re-encode happened) and got smaller (Q85 vs Q99). + assert out != high_q + assert len(out) < len(high_q) + + def test_byte_stream_wrapper(): """ Test that we can call `open` and `close` repeatedly many times on a From 4e94f99faf76ac3c00267dd858980bf2dd2d0b45 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 17:48:06 -0700 Subject: [PATCH 02/14] addressing PR feedback --- pyproject.toml | 2 +- src/groundlight/client.py | 10 +++++----- src/groundlight/images.py | 22 +++++++++------------- test/integration/test_groundlight.py | 9 +++++---- test/unit/test_imagefuncs.py | 12 ++++++------ 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 669ed0f3..8ca9c990 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ packages = [ {include = "**/*.py", from = "src"}, ] readme = "README.md" -version = "0.28.0" +version = "0.29.0" [tool.poetry.dependencies] # For certifi, use ">=" instead of "^" since it upgrades its "major version" every year, not really following semver diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 08699447..0da340bd 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -43,7 +43,7 @@ from groundlight.binary_labels import Label, convert_internal_label_to_display from groundlight.config import API_TOKEN_MISSING_HELP_MESSAGE, API_TOKEN_VARIABLE_NAME, DISABLE_TLS_VARIABLE_NAME from groundlight.encodings import url_encode_dict -from groundlight.images import ByteStreamWrapper, parse_supported_image_types, recompress_shrink_image +from groundlight.images import ByteStreamWrapper, parse_supported_image_types, shrink_image_if_needed from groundlight.internalapi import ( GroundlightApiClient, NotFoundError, @@ -799,10 +799,10 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t detector_id = detector.id if isinstance(detector, Detector) else detector image_bytesio: ByteStreamWrapper = parse_supported_image_types(image) - # Match the cloud's ingest pipeline locally. Saves bandwidth and ensures Edge - # Endpoints, which do not run this step, see the same input distribution - # cloud-trained models were trained on. - image_bytesio = ByteStreamWrapper(data=recompress_shrink_image(image_bytesio.read())) + # Match the Groundlight cloud service's ingest pipeline locally. Saves bandwidth + # and ensures Edge Endpoints, which do not run this step, see the same input + # distribution cloud-trained models were trained on. + image_bytesio = ByteStreamWrapper(data=shrink_image_if_needed(image_bytesio.read())) params = { "detector_id": detector_id, diff --git a/src/groundlight/images.py b/src/groundlight/images.py index 6d93d260..89392e72 100644 --- a/src/groundlight/images.py +++ b/src/groundlight/images.py @@ -7,18 +7,14 @@ DEFAULT_JPEG_QUALITY = 95 -# The Groundlight cloud applies a recompress/shrink step on ingest. Doing the same -# work client-side saves bandwidth and ensures Edge Endpoints, which do not run -# this step, see the same input distribution that cloud-trained models expect. -# -# The constants and algorithm below mirror zuuul's implementation. Source of truth: -# - zuuul/janzu/apparati/imgtools.py::recompress_shrink_image -# - zuuul/janzu/reef_api/utils.py::_save_image (gate) -# - zuuul/janzu/authz/user-settings-defaults.yaml (default values) -# If the cloud's behavior changes, update these together. +# The Groundlight cloud service applies the same shrink-and-re-encode step on +# ingest. Doing the same work client-side saves bandwidth and ensures Edge +# Endpoints, which do not run this step, see the same input distribution that +# cloud-trained models expect. Keep these constants in sync with the cloud +# service if it ever changes its defaults. MAX_BYTES_IMAGE_SIZE = 256_000 MAX_IMAGE_RESOLUTION_LONGSIDE = 1024 -RECOMPRESS_SHRINK_IMAGE_JPEG_QUALITY = 85 +SHRINK_JPEG_QUALITY = 85 class ByteStreamWrapper(IOBase): @@ -91,8 +87,8 @@ def bytestream_from_pil(pil_image: Image.Image, jpeg_quality: int = DEFAULT_JPEG return ByteStreamWrapper(data=bytesio) -def recompress_shrink_image(jpeg: bytes) -> bytes: - """Shrink and re-encode an oversized JPEG to match the cloud's ingest pipeline. +def shrink_image_if_needed(jpeg: bytes) -> bytes: + """Shrink an oversized JPEG to match the Groundlight cloud service's ingest pipeline. If the input is already at or below MAX_BYTES_IMAGE_SIZE, returns it unchanged. Otherwise, decodes the image, scales it (BICUBIC, aspect-ratio preserved) so the @@ -109,7 +105,7 @@ def recompress_shrink_image(jpeg: bytes) -> bytes: new_size = (int(img.width * ratio), int(img.height * ratio)) img = img.resize(new_size, resample=Image.Resampling.BICUBIC) buf = BytesIO() - img.save(buf, "jpeg", quality=RECOMPRESS_SHRINK_IMAGE_JPEG_QUALITY) + img.save(buf, "jpeg", quality=SHRINK_JPEG_QUALITY) return buf.getvalue() diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index a3e0f298..3a16db11 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -374,10 +374,11 @@ def test_submit_image_query_png(gl: Groundlight, detector: Detector): def test_submit_image_query_shrinks_oversized_image(gl: Groundlight, detector: Detector): """Verifies the SDK shrinks oversized images client-side and the cloud stores the shrunken version. - Detects drift between the SDK and zuuul: if either side changes its algorithm such that - the cloud-stored dimensions differ from what the SDK produces locally, this test fails. - Does not catch zuuul becoming more permissive (the SDK would still shrink to a smaller - image that zuuul accepts as-is); that direction is benign and intentionally not covered. + Detects drift between the SDK and the cloud service: if either side changes its + algorithm such that the cloud-stored dimensions differ from what the SDK produces + locally, this test fails. Does not catch the cloud service becoming more permissive + (the SDK would still shrink to a smaller image that the cloud accepts as-is); that + direction is benign and intentionally not covered. """ np.random.seed(0) # Random noise compresses poorly, so 3000x4000 is well above the 256 KB threshold. diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index 6b13c73e..f2c7f231 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -91,35 +91,35 @@ def test_pil_support_ref(): @pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore -def test_recompress_shrink_image_small_returns_unchanged(): +def test_shrink_image_if_needed_small_returns_unchanged(): """Images at or below the byte threshold are passed through untouched.""" np.random.seed(0) small = jpeg_from_numpy(np.random.uniform(0, 255, (200, 200, 3))) assert len(small) <= MAX_BYTES_IMAGE_SIZE - assert recompress_shrink_image(small) is small + assert shrink_image_if_needed(small) is small @pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore -def test_recompress_shrink_image_oversized_dimensions_get_resized(): +def test_shrink_image_if_needed_oversized_dimensions_get_resized(): """Images above the byte threshold with longest side > 1024 are downscaled.""" np.random.seed(0) # Random noise compresses poorly, so 3000x4000 easily exceeds the 256 KB threshold. big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) assert len(big) > MAX_BYTES_IMAGE_SIZE - out = recompress_shrink_image(big) + out = shrink_image_if_needed(big) out_img = Image.open(BytesIO(out)) # 3000x4000 scaled so longest side == 1024 preserves the 3:4 aspect ratio. assert out_img.size == (1024, 768) @pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore -def test_recompress_shrink_image_oversized_bytes_only_gets_reencoded(): +def test_shrink_image_if_needed_oversized_bytes_only_gets_reencoded(): """Images above the byte threshold but with longest side <= 1024 are re-encoded only.""" np.random.seed(0) arr = np.random.uniform(0, 255, (768, 1024, 3)) high_q = jpeg_from_numpy(arr, jpeg_quality=99) assert len(high_q) > MAX_BYTES_IMAGE_SIZE - out = recompress_shrink_image(high_q) + out = shrink_image_if_needed(high_q) out_img = Image.open(BytesIO(out)) assert out_img.size == (1024, 768) # Bytes changed (proves re-encode happened) and got smaller (Q85 vs Q99). From 24764e37fc10f85c5d651a64509d6f8972419830 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 18:19:52 -0700 Subject: [PATCH 03/14] adding an end-to-end test --- test/unit/test_image_submission.py | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 test/unit/test_image_submission.py diff --git a/test/unit/test_image_submission.py b/test/unit/test_image_submission.py new file mode 100644 index 00000000..049c6ecf --- /dev/null +++ b/test/unit/test_image_submission.py @@ -0,0 +1,31 @@ +"""Tests for image handling behavior in Groundlight.submit_image_query.""" + +from io import BytesIO +from unittest import mock + +import pytest +from groundlight import Groundlight +from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE, jpeg_from_numpy +from groundlight.internalapi import InternalApiError +from groundlight.optional_imports import MISSING_NUMPY, MISSING_PIL, Image, np + + +@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore +def test_submit_image_query_sends_shrunken_image(gl: Groundlight): + """Verifies that image shrinking runs in the submission path by inspecting the bytes at the HTTP layer. + + Submits an oversized image to a mocked urllib3 transport, then checks that the body + that actually went on the wire was already resized to the expected dimensions. + """ + np.random.seed(0) + big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + assert len(big) > MAX_BYTES_IMAGE_SIZE + + with mock.patch("urllib3.PoolManager.request") as mock_request: + mock_request.return_value.status = 500 + with pytest.raises(InternalApiError): + gl.submit_image_query(detector="det_test", image=big, wait=0) + + body = mock_request.call_args_list[0].kwargs["body"] + sent_img = Image.open(BytesIO(body)) + assert max(sent_img.size) == MAX_IMAGE_RESOLUTION_LONGSIDE From b34711ae32f15a185bf7c84ca161d697a50802ce Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 18:27:41 -0700 Subject: [PATCH 04/14] adding a space --- src/groundlight/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 0da340bd..32166600 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -799,6 +799,7 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t detector_id = detector.id if isinstance(detector, Detector) else detector image_bytesio: ByteStreamWrapper = parse_supported_image_types(image) + # Match the Groundlight cloud service's ingest pipeline locally. Saves bandwidth # and ensures Edge Endpoints, which do not run this step, see the same input # distribution cloud-trained models were trained on. From c5593a83403a6ecb2e2283325dfb2d0181b81ac1 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Mon, 18 May 2026 01:28:27 +0000 Subject: [PATCH 05/14] Automatically reformatting code --- src/groundlight/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groundlight/client.py b/src/groundlight/client.py index 32166600..edcb8771 100644 --- a/src/groundlight/client.py +++ b/src/groundlight/client.py @@ -799,7 +799,7 @@ def submit_image_query( # noqa: PLR0913 # pylint: disable=too-many-arguments, t detector_id = detector.id if isinstance(detector, Detector) else detector image_bytesio: ByteStreamWrapper = parse_supported_image_types(image) - + # Match the Groundlight cloud service's ingest pipeline locally. Saves bandwidth # and ensures Edge Endpoints, which do not run this step, see the same input # distribution cloud-trained models were trained on. From 71af6f5dde6a7c13d2573189289f258a6374ffd4 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 18:30:39 -0700 Subject: [PATCH 06/14] adjusting a comment --- src/groundlight/images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/groundlight/images.py b/src/groundlight/images.py index 89392e72..d618144c 100644 --- a/src/groundlight/images.py +++ b/src/groundlight/images.py @@ -7,7 +7,7 @@ DEFAULT_JPEG_QUALITY = 95 -# The Groundlight cloud service applies the same shrink-and-re-encode step on +# The Groundlight cloud service applies the same shrink-and-encode step on # ingest. Doing the same work client-side saves bandwidth and ensures Edge # Endpoints, which do not run this step, see the same input distribution that # cloud-trained models expect. Keep these constants in sync with the cloud From 168ed8930a4b2659f78152565f44b7580254cd77 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 18:50:37 -0700 Subject: [PATCH 07/14] refactoring tests to not use numpy --- test/integration/test_groundlight.py | 15 ++++++++++----- test/unit/test_image_submission.py | 17 ++++++++++++----- test/unit/test_imagefuncs.py | 24 +++++++++++++----------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 3a16db11..2a62c569 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -2,6 +2,7 @@ # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel import json +import os import random import string import time @@ -11,7 +12,7 @@ import pytest from groundlight import Groundlight from groundlight.binary_labels import VALID_DISPLAY_LABELS, Label, convert_internal_label_to_display -from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE, jpeg_from_numpy +from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE from groundlight.internalapi import ApiException, NotFoundError from groundlight.optional_imports import * from groundlight.status_codes import is_user_error @@ -380,12 +381,16 @@ def test_submit_image_query_shrinks_oversized_image(gl: Groundlight, detector: D (the SDK would still shrink to a smaller image that the cloud accepts as-is); that direction is benign and intentionally not covered. """ - np.random.seed(0) - # Random noise compresses poorly, so 3000x4000 is well above the 256 KB threshold. - big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + # Random noise compresses poorly, so 4000x3000 is well above the 256 KB threshold. + raw = os.urandom(4000 * 3000 * 3) + big_pil = Image.frombytes("RGB", (4000, 3000), raw) + buf = BytesIO() + big_pil.save(buf, "jpeg", quality=95) + big = buf.getvalue() + iq = gl.submit_image_query(detector=detector.id, image=big, human_review="NEVER") stored = Image.open(BytesIO(gl.get_image(iq.id))) - # 3000x4000 scaled so longest side == 1024 preserves the 3:4 aspect ratio. + # 4000x3000 scaled so longest side == 1024 preserves the 4:3 aspect ratio. assert stored.size == (MAX_IMAGE_RESOLUTION_LONGSIDE, 768) diff --git a/test/unit/test_image_submission.py b/test/unit/test_image_submission.py index 049c6ecf..7f10c711 100644 --- a/test/unit/test_image_submission.py +++ b/test/unit/test_image_submission.py @@ -1,24 +1,31 @@ """Tests for image handling behavior in Groundlight.submit_image_query.""" +import os from io import BytesIO from unittest import mock import pytest from groundlight import Groundlight -from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE, jpeg_from_numpy +from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE from groundlight.internalapi import InternalApiError -from groundlight.optional_imports import MISSING_NUMPY, MISSING_PIL, Image, np +from PIL import Image + + +def _make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: + """Generate a JPEG with random pixel data using PIL only.""" + img = Image.frombytes("RGB", (width, height), os.urandom(width * height * 3)) + buf = BytesIO() + img.save(buf, "jpeg", quality=quality) + return buf.getvalue() -@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore def test_submit_image_query_sends_shrunken_image(gl: Groundlight): """Verifies that image shrinking runs in the submission path by inspecting the bytes at the HTTP layer. Submits an oversized image to a mocked urllib3 transport, then checks that the body that actually went on the wire was already resized to the expected dimensions. """ - np.random.seed(0) - big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + big = _make_random_jpeg(4000, 3000) assert len(big) > MAX_BYTES_IMAGE_SIZE with mock.patch("urllib3.PoolManager.request") as mock_request: diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index f2c7f231..588e4c4c 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -1,6 +1,7 @@ # Optional star-imports are weird and not usually recommended ... # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel +import os import tempfile from io import BytesIO @@ -8,6 +9,14 @@ from groundlight.images import * from groundlight.optional_imports import * + +def _make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: + """Generate a JPEG with random pixel data using PIL only.""" + img = Image.frombytes("RGB", (width, height), os.urandom(width * height * 3)) + buf = BytesIO() + img.save(buf, "jpeg", quality=quality) + return buf.getvalue() + JPEG_MIN_SIZE = 500 @@ -90,34 +99,27 @@ def test_pil_support_ref(): assert img2.size == (509, 339) -@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore def test_shrink_image_if_needed_small_returns_unchanged(): """Images at or below the byte threshold are passed through untouched.""" - np.random.seed(0) - small = jpeg_from_numpy(np.random.uniform(0, 255, (200, 200, 3))) + small = _make_random_jpeg(200, 200) assert len(small) <= MAX_BYTES_IMAGE_SIZE assert shrink_image_if_needed(small) is small -@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore def test_shrink_image_if_needed_oversized_dimensions_get_resized(): """Images above the byte threshold with longest side > 1024 are downscaled.""" - np.random.seed(0) # Random noise compresses poorly, so 3000x4000 easily exceeds the 256 KB threshold. - big = jpeg_from_numpy(np.random.uniform(0, 255, (3000, 4000, 3))) + big = _make_random_jpeg(4000, 3000) assert len(big) > MAX_BYTES_IMAGE_SIZE out = shrink_image_if_needed(big) out_img = Image.open(BytesIO(out)) - # 3000x4000 scaled so longest side == 1024 preserves the 3:4 aspect ratio. + # 4000x3000 scaled so longest side == 1024 preserves the 4:3 aspect ratio. assert out_img.size == (1024, 768) -@pytest.mark.skipif(MISSING_NUMPY or MISSING_PIL, reason="Needs numpy and pillow") # type: ignore def test_shrink_image_if_needed_oversized_bytes_only_gets_reencoded(): """Images above the byte threshold but with longest side <= 1024 are re-encoded only.""" - np.random.seed(0) - arr = np.random.uniform(0, 255, (768, 1024, 3)) - high_q = jpeg_from_numpy(arr, jpeg_quality=99) + high_q = _make_random_jpeg(1024, 768, quality=99) assert len(high_q) > MAX_BYTES_IMAGE_SIZE out = shrink_image_if_needed(high_q) out_img = Image.open(BytesIO(out)) From 3b8419da97cacc52226f1979007dc86e8d34ef43 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Mon, 18 May 2026 01:51:16 +0000 Subject: [PATCH 08/14] Automatically reformatting code --- test/unit/test_imagefuncs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index 588e4c4c..24d14404 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -17,6 +17,7 @@ def _make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: img.save(buf, "jpeg", quality=quality) return buf.getvalue() + JPEG_MIN_SIZE = 500 From cee2d0c598bad42649362009644fdbe64a13d265 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Sun, 17 May 2026 22:08:17 -0700 Subject: [PATCH 09/14] fixing a broken test --- test/integration/test_groundlight.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index 2a62c569..c87c356c 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -389,7 +389,7 @@ def test_submit_image_query_shrinks_oversized_image(gl: Groundlight, detector: D big = buf.getvalue() iq = gl.submit_image_query(detector=detector.id, image=big, human_review="NEVER") - stored = Image.open(BytesIO(gl.get_image(iq.id))) + stored = Image.open(gl.get_image(iq.id)) # 4000x3000 scaled so longest side == 1024 preserves the 4:3 aspect ratio. assert stored.size == (MAX_IMAGE_RESOLUTION_LONGSIDE, 768) From d9cc52bead6e8b3eb91a353334be16573d9add7b Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Mon, 18 May 2026 08:40:20 -0700 Subject: [PATCH 10/14] refactoring tests --- test/integration/test_groundlight.py | 7 ++----- test/unit/test_image_submission.py | 12 ++---------- test/unit/test_imagefuncs.py | 16 ++++------------ test/utils.py | 14 ++++++++++++++ 4 files changed, 22 insertions(+), 27 deletions(-) create mode 100644 test/utils.py diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index c87c356c..b1aa7ce0 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Optional, Union import pytest +from utils import make_random_jpeg from groundlight import Groundlight from groundlight.binary_labels import VALID_DISPLAY_LABELS, Label, convert_internal_label_to_display from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE @@ -382,11 +383,7 @@ def test_submit_image_query_shrinks_oversized_image(gl: Groundlight, detector: D direction is benign and intentionally not covered. """ # Random noise compresses poorly, so 4000x3000 is well above the 256 KB threshold. - raw = os.urandom(4000 * 3000 * 3) - big_pil = Image.frombytes("RGB", (4000, 3000), raw) - buf = BytesIO() - big_pil.save(buf, "jpeg", quality=95) - big = buf.getvalue() + big = make_random_jpeg(4000, 3000) iq = gl.submit_image_query(detector=detector.id, image=big, human_review="NEVER") stored = Image.open(gl.get_image(iq.id)) diff --git a/test/unit/test_image_submission.py b/test/unit/test_image_submission.py index 7f10c711..53844c20 100644 --- a/test/unit/test_image_submission.py +++ b/test/unit/test_image_submission.py @@ -1,31 +1,23 @@ """Tests for image handling behavior in Groundlight.submit_image_query.""" -import os from io import BytesIO from unittest import mock import pytest +from utils import make_random_jpeg from groundlight import Groundlight from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE from groundlight.internalapi import InternalApiError from PIL import Image -def _make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: - """Generate a JPEG with random pixel data using PIL only.""" - img = Image.frombytes("RGB", (width, height), os.urandom(width * height * 3)) - buf = BytesIO() - img.save(buf, "jpeg", quality=quality) - return buf.getvalue() - - def test_submit_image_query_sends_shrunken_image(gl: Groundlight): """Verifies that image shrinking runs in the submission path by inspecting the bytes at the HTTP layer. Submits an oversized image to a mocked urllib3 transport, then checks that the body that actually went on the wire was already resized to the expected dimensions. """ - big = _make_random_jpeg(4000, 3000) + big = make_random_jpeg(4000, 3000) assert len(big) > MAX_BYTES_IMAGE_SIZE with mock.patch("urllib3.PoolManager.request") as mock_request: diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index 24d14404..f8625393 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -1,23 +1,15 @@ # Optional star-imports are weird and not usually recommended ... # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel -import os import tempfile from io import BytesIO import pytest +from utils import make_random_jpeg from groundlight.images import * from groundlight.optional_imports import * -def _make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: - """Generate a JPEG with random pixel data using PIL only.""" - img = Image.frombytes("RGB", (width, height), os.urandom(width * height * 3)) - buf = BytesIO() - img.save(buf, "jpeg", quality=quality) - return buf.getvalue() - - JPEG_MIN_SIZE = 500 @@ -102,7 +94,7 @@ def test_pil_support_ref(): def test_shrink_image_if_needed_small_returns_unchanged(): """Images at or below the byte threshold are passed through untouched.""" - small = _make_random_jpeg(200, 200) + small = make_random_jpeg(200, 200) assert len(small) <= MAX_BYTES_IMAGE_SIZE assert shrink_image_if_needed(small) is small @@ -110,7 +102,7 @@ def test_shrink_image_if_needed_small_returns_unchanged(): def test_shrink_image_if_needed_oversized_dimensions_get_resized(): """Images above the byte threshold with longest side > 1024 are downscaled.""" # Random noise compresses poorly, so 3000x4000 easily exceeds the 256 KB threshold. - big = _make_random_jpeg(4000, 3000) + big = make_random_jpeg(4000, 3000) assert len(big) > MAX_BYTES_IMAGE_SIZE out = shrink_image_if_needed(big) out_img = Image.open(BytesIO(out)) @@ -120,7 +112,7 @@ def test_shrink_image_if_needed_oversized_dimensions_get_resized(): def test_shrink_image_if_needed_oversized_bytes_only_gets_reencoded(): """Images above the byte threshold but with longest side <= 1024 are re-encoded only.""" - high_q = _make_random_jpeg(1024, 768, quality=99) + high_q = make_random_jpeg(1024, 768, quality=99) assert len(high_q) > MAX_BYTES_IMAGE_SIZE out = shrink_image_if_needed(high_q) out_img = Image.open(BytesIO(out)) diff --git a/test/utils.py b/test/utils.py new file mode 100644 index 00000000..4178052d --- /dev/null +++ b/test/utils.py @@ -0,0 +1,14 @@ +"""Shared utility functions for tests.""" + +import os +from io import BytesIO + +from PIL import Image + + +def make_random_jpeg(width: int, height: int, quality: int = 95) -> bytes: + """Generate a JPEG with random pixel data.""" + img = Image.frombytes("RGB", (width, height), os.urandom(width * height * 3)) + buf = BytesIO() + img.save(buf, "jpeg", quality=quality) + return buf.getvalue() From c715466ecb1e5cc26e123a3b52aa87b7e6c47d99 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Mon, 18 May 2026 15:41:22 +0000 Subject: [PATCH 11/14] Automatically reformatting code --- test/integration/test_groundlight.py | 4 +--- test/unit/test_image_submission.py | 2 +- test/unit/test_imagefuncs.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index b1aa7ce0..63ddc84c 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -2,15 +2,12 @@ # ruff: noqa: F403,F405 # pylint: disable=wildcard-import,unused-wildcard-import,redefined-outer-name,import-outside-toplevel import json -import os import random import string import time -from io import BytesIO from typing import Any, Callable, Dict, Optional, Union import pytest -from utils import make_random_jpeg from groundlight import Groundlight from groundlight.binary_labels import VALID_DISPLAY_LABELS, Label, convert_internal_label_to_display from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE @@ -33,6 +30,7 @@ ) from urllib3.exceptions import ConnectTimeoutError, MaxRetryError, ReadTimeoutError from urllib3.util.retry import Retry +from utils import make_random_jpeg from test.retry_decorator import retry_on_failure diff --git a/test/unit/test_image_submission.py b/test/unit/test_image_submission.py index 53844c20..0003dba1 100644 --- a/test/unit/test_image_submission.py +++ b/test/unit/test_image_submission.py @@ -4,11 +4,11 @@ from unittest import mock import pytest -from utils import make_random_jpeg from groundlight import Groundlight from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE from groundlight.internalapi import InternalApiError from PIL import Image +from utils import make_random_jpeg def test_submit_image_query_sends_shrunken_image(gl: Groundlight): diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index f8625393..a1e7f96e 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -5,10 +5,9 @@ from io import BytesIO import pytest -from utils import make_random_jpeg from groundlight.images import * from groundlight.optional_imports import * - +from utils import make_random_jpeg JPEG_MIN_SIZE = 500 From cb80fc10da625905ed2eeb4baf87187a481cad94 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Mon, 18 May 2026 09:00:03 -0700 Subject: [PATCH 12/14] fixing failing test --- pyproject.toml | 1 + test/integration/test_groundlight.py | 2 +- test/unit/test_image_submission.py | 2 +- test/unit/test_imagefuncs.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ca9c990..2623d6a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ spaces_indent_inline_array = 4 trailing_comma_inline_array = true [tool.pytest.ini_options] +pythonpath = ["test"] markers = [ "skip_for_edge_endpoint", "run_only_for_edge_endpoint", diff --git a/test/integration/test_groundlight.py b/test/integration/test_groundlight.py index b1aa7ce0..15fbcb1e 100644 --- a/test/integration/test_groundlight.py +++ b/test/integration/test_groundlight.py @@ -10,7 +10,6 @@ from typing import Any, Callable, Dict, Optional, Union import pytest -from utils import make_random_jpeg from groundlight import Groundlight from groundlight.binary_labels import VALID_DISPLAY_LABELS, Label, convert_internal_label_to_display from groundlight.images import MAX_IMAGE_RESOLUTION_LONGSIDE @@ -33,6 +32,7 @@ ) from urllib3.exceptions import ConnectTimeoutError, MaxRetryError, ReadTimeoutError from urllib3.util.retry import Retry +from utils import make_random_jpeg from test.retry_decorator import retry_on_failure diff --git a/test/unit/test_image_submission.py b/test/unit/test_image_submission.py index 53844c20..0003dba1 100644 --- a/test/unit/test_image_submission.py +++ b/test/unit/test_image_submission.py @@ -4,11 +4,11 @@ from unittest import mock import pytest -from utils import make_random_jpeg from groundlight import Groundlight from groundlight.images import MAX_BYTES_IMAGE_SIZE, MAX_IMAGE_RESOLUTION_LONGSIDE from groundlight.internalapi import InternalApiError from PIL import Image +from utils import make_random_jpeg def test_submit_image_query_sends_shrunken_image(gl: Groundlight): diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index f8625393..0ae747c3 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -5,9 +5,9 @@ from io import BytesIO import pytest -from utils import make_random_jpeg from groundlight.images import * from groundlight.optional_imports import * +from utils import make_random_jpeg JPEG_MIN_SIZE = 500 From 8a2adaf069adb089815a082388bd3cc1f8bcb902 Mon Sep 17 00:00:00 2001 From: Auto-format Bot Date: Mon, 18 May 2026 16:02:01 +0000 Subject: [PATCH 13/14] Automatically reformatting code --- test/unit/test_imagefuncs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index 0ae747c3..a1e7f96e 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -9,7 +9,6 @@ from groundlight.optional_imports import * from utils import make_random_jpeg - JPEG_MIN_SIZE = 500 From a05e59765216acaef18263073b402217e79e6468 Mon Sep 17 00:00:00 2001 From: Tim Huff Date: Mon, 18 May 2026 12:57:49 -0700 Subject: [PATCH 14/14] fixing typo --- test/unit/test_imagefuncs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/test_imagefuncs.py b/test/unit/test_imagefuncs.py index a1e7f96e..587b2fae 100644 --- a/test/unit/test_imagefuncs.py +++ b/test/unit/test_imagefuncs.py @@ -100,7 +100,7 @@ def test_shrink_image_if_needed_small_returns_unchanged(): def test_shrink_image_if_needed_oversized_dimensions_get_resized(): """Images above the byte threshold with longest side > 1024 are downscaled.""" - # Random noise compresses poorly, so 3000x4000 easily exceeds the 256 KB threshold. + # Random noise compresses poorly, so 4000x3000 easily exceeds the 256 KB threshold. big = make_random_jpeg(4000, 3000) assert len(big) > MAX_BYTES_IMAGE_SIZE out = shrink_image_if_needed(big)