From ea8a3d19d731d499d96c7315ebe55f55c94eabbc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:32:41 +0000 Subject: [PATCH 1/5] Initial plan From 4330539dbd3fcef7d46e980d43a45397cab0b8a0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:37:26 +0000 Subject: [PATCH 2/5] Add GHCR support to create_singularities script - Add `get_last_ghcr_version_tag` static method on `Builder` that uses the OCI registry API at ghcr.io with anonymous Bearer auth to list tags for public GitHub Container Registry images - Modify `retry_get` to accept optional `headers` dict for auth support - Fix `get_familyname` to use the last path segment (`re.sub(r'.*/','',...)`) so that 3-segment GHCR IDs like `ghcr.io/owner/image` are handled correctly alongside the existing 2-segment Docker Hub IDs - Modify `generate_singularity_for_docker_image` to detect `ghcr.io/` prefix: route tag listing to GHCR API, and auto-extract family name from the owner segment (e.g. `unfmontreal` from `ghcr.io/unfmontreal/skullduggery`) - Add `ghcr.io/unfmontreal/skullduggery` to the repronim section in main() Agent-Logs-Url: https://github.com/ReproNim/containers/sessions/b13bea98-5812-4d1c-8b7c-e4b16460e8d2 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- scripts/create_singularities | 74 ++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/scripts/create_singularities b/scripts/create_singularities index b725ea5c..e418b083 100755 --- a/scripts/create_singularities +++ b/scripts/create_singularities @@ -91,6 +91,7 @@ class Builder: def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags") versions = [cast(str, res["name"]) for res in r.json()["results"]] + if version_regex: versions = [v for v in versions if re.search(version_regex, v)] if len(versions) > 1 or (versions and only_good_versions): @@ -118,6 +119,50 @@ class Builder: else: return (versions[0], versions[0]) + @staticmethod + def get_last_ghcr_version_tag(image: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: + """Get the last version tag for a GitHub Container Registry (ghcr.io) image. + + Args: + image: Image path without the registry prefix, e.g. "unfmontreal/skullduggery" + (the part after "ghcr.io/") + """ + # Obtain an anonymous Bearer token for public GHCR images + token_r = retry_get( + f"https://ghcr.io/token?scope=repository:{image}:pull&service=ghcr.io" + ) + token = token_r.json().get("token") or token_r.json().get("access_token", "") + + r = retry_get( + f"https://ghcr.io/v2/{image}/tags/list", + headers={"Authorization": f"Bearer {token}"}, + ) + versions: list[str] = r.json().get("tags") or [] + + if version_regex: + versions = [v for v in versions if re.search(version_regex, v)] + if len(versions) > 1 or (versions and only_good_versions): + good_versions: dict[str, str] = {} + for v in versions: + if not ( + re.search(r"[ab][0-9]+$", v) + or re.search(r"rc[0-9]*$", v) + or "master" in v + ): + m = re.match(r"(([Vv]|version-|release-|)([0-9]{1,10}\..*))", v) + if m: + good_versions[m[3]] = m[1] + if good_versions: + k = max(good_versions, key=version_key) + return (k, good_versions[k]) + else: + return None + elif not versions: + log.info(" %s no version. Tags: %s", image, " ".join(versions)) + return None + else: + return (versions[0], versions[0]) + @staticmethod def get_docker_repositories(namespace: str, full:bool = True) -> Generator[str, None, None]: """Return repositories for a specific namespace (user or organization) @@ -340,9 +385,15 @@ class Builder: familysuf: Optional[str]=None, ) \ -> None: + is_ghcr = githubid.startswith("ghcr.io/") dockerhubid = githubid.lower() if not family: - family = dockerhubid.split('/', 1)[0] + if is_ghcr: + # For ghcr.io/owner/image, use owner as the family + parts = dockerhubid.split('/') + family = parts[1] if len(parts) > 1 else dockerhubid + else: + family = dockerhubid.split('/', 1)[0] if self.githubids and githubid not in self.githubids: log.info("skip %s", githubid) return @@ -350,9 +401,15 @@ class Builder: # log.info("TEMP TODO skip %s due to odd OCI issue ATM with old singularity", dockerhubid) # return log.info("%s <- docker %s", family, dockerhubid) - last_version = self.get_last_docker_version_tag( - dockerhubid, only_good_versions=only_good_versions, - version_regex=version_regex) + if is_ghcr: + ghcr_image = dockerhubid[len("ghcr.io/"):] + last_version = self.get_last_ghcr_version_tag( + ghcr_image, only_good_versions=only_good_versions, + version_regex=version_regex) + else: + last_version = self.get_last_docker_version_tag( + dockerhubid, only_good_versions=only_good_versions, + version_regex=version_regex) if last_version is not None: last_version_pure, last_version_tag = last_version # print(dockerhubid, last_version) @@ -460,7 +517,7 @@ def version_key(vstr: str) -> tuple[int, tuple[int, ...], str]: # Use familysuf to add smth like "LTS" or alike def get_familyname(repoid: str, family: str, familysuf: Optional[str]=None) -> str: - name = re.sub(r"^[^/]*/", "", repoid) + name = re.sub(r".*/", "", repoid) # take only the last path segment (handles ghcr.io/owner/image) # sanitize for datalad not allowing _ in the container names name = name.replace("_", "-") familyname = f"{family}-{name}" @@ -562,6 +619,9 @@ def main(push: bool, image_groups: tuple[str, ...], no_singularity_check: bool, # Docker image for simple_workflow builder.generate_singularity_for_docker_image("ReproNim/simple_workflow") + if should_build('repronim', 'ghcr.io/unfmontreal/skullduggery'): + builder.generate_singularity_for_docker_image("ghcr.io/unfmontreal/skullduggery", "repronim") + # neuronets if should_build('neuronets', 'neuronets/kwyk'): builder.generate_singularity_for_docker_image("neuronets/kwyk") @@ -588,11 +648,11 @@ def main(push: bool, image_groups: tuple[str, ...], no_singularity_check: bool, builder.runcmd("datalad", "push", "--data=auto") # so we share with the world -def retry_get(url: str) -> requests.Response: +def retry_get(url: str, headers: Optional[dict[str, str]] = None) -> requests.Response: sleepiter = exp_wait(attempts=10) while True: try: - r = requests.get(url) + r = requests.get(url, headers=headers) r.raise_for_status() return r except (requests.ConnectionError, requests.HTTPError, requests.Timeout) as e: From 8a5803e21cd03f1240881f2e2f6ba44c8eafce48 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 17:38:49 +0000 Subject: [PATCH 3/5] Refactor: extract _select_best_version to eliminate code duplication Agent-Logs-Url: https://github.com/ReproNim/containers/sessions/b13bea98-5812-4d1c-8b7c-e4b16460e8d2 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- scripts/create_singularities | 53 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/scripts/create_singularities b/scripts/create_singularities index e418b083..a13debf0 100755 --- a/scripts/create_singularities +++ b/scripts/create_singularities @@ -88,12 +88,17 @@ class Builder: return subprocess.run(args, **kwargs) @staticmethod - def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: - r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags") - versions = [cast(str, res["name"]) for res in r.json()["results"]] + def _select_best_version(versions: list[str], image: str, only_good_versions: bool=False) -> Optional[tuple[str, str]]: + """Select the best version tag from a list of tags. - if version_regex: - versions = [v for v in versions if re.search(version_regex, v)] + Args: + versions: List of version tag strings (already filtered by version_regex if needed) + image: Image identifier used only for logging + only_good_versions: If True, require a semver-style version even when only one tag exists + + Returns: + A tuple (version_pure, version_tag) for the best version, or None if none qualify. + """ if len(versions) > 1 or (versions and only_good_versions): # select only the ones which seems to be semantic and/or # master/latest. Some release alpha releases, so probably would @@ -114,11 +119,20 @@ class Builder: else: return None elif not versions: - log.info(" %s no version. Tags: %s", dh, " ".join(versions)) + log.info(" %s no version. Tags: %s", image, " ".join(versions)) return None else: return (versions[0], versions[0]) + @staticmethod + def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: + r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags") + versions = [cast(str, res["name"]) for res in r.json()["results"]] + + if version_regex: + versions = [v for v in versions if re.search(version_regex, v)] + return Builder._select_best_version(versions, dh, only_good_versions) + @staticmethod def get_last_ghcr_version_tag(image: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: """Get the last version tag for a GitHub Container Registry (ghcr.io) image. @@ -126,6 +140,11 @@ class Builder: Args: image: Image path without the registry prefix, e.g. "unfmontreal/skullduggery" (the part after "ghcr.io/") + only_good_versions: If True, require a semver-style version even when only one tag exists + version_regex: If set, only consider tags matching this regex + + Returns: + A tuple (version_pure, version_tag) for the best version, or None if none qualify. """ # Obtain an anonymous Bearer token for public GHCR images token_r = retry_get( @@ -141,27 +160,7 @@ class Builder: if version_regex: versions = [v for v in versions if re.search(version_regex, v)] - if len(versions) > 1 or (versions and only_good_versions): - good_versions: dict[str, str] = {} - for v in versions: - if not ( - re.search(r"[ab][0-9]+$", v) - or re.search(r"rc[0-9]*$", v) - or "master" in v - ): - m = re.match(r"(([Vv]|version-|release-|)([0-9]{1,10}\..*))", v) - if m: - good_versions[m[3]] = m[1] - if good_versions: - k = max(good_versions, key=version_key) - return (k, good_versions[k]) - else: - return None - elif not versions: - log.info(" %s no version. Tags: %s", image, " ".join(versions)) - return None - else: - return (versions[0], versions[0]) + return Builder._select_best_version(versions, image, only_good_versions) @staticmethod def get_docker_repositories(namespace: str, full:bool = True) -> Generator[str, None, None]: From 03988c7bded0585a4dc7b0102d78ed52ce2b6f53 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:37:22 +0000 Subject: [PATCH 4/5] Address review: unified OCI path, non-semver fallback, pagination TODO, unit tests Per review by @yarikoptic: 1. Unified OCI API: replace separate get_last_docker_version_tag and get_last_ghcr_version_tag with a new OCIRegistry dataclass + single get_last_version_tag dispatcher. All three registries (Docker Hub, GHCR, Quay.io) share one /v2/{repo}/tags/list code path with a bearer token handshake. 2. Non-semver fallback: when version_regex is explicitly provided but no semver tag matches, _select_best_version now returns the first regex-matched tag verbatim (unless only_good_versions=True). This makes version_regex=r'^main$' work for skullduggery. 3. Skullduggery call updated to version_regex=r'^main$' so it actually builds the main-branch image until versioned releases are published. 4. Pagination TODO comment added inside OCIRegistry.list_tags. 5. generate_singularity_for_docker_image simplified: removed is_ghcr branching, unified family extraction via parts[-2]. 6. Unit tests added in scripts/tests/test_create_singularities.py (17 tests covering OCIRegistry.for_image, list_tags, _select_best_version, and get_last_version_tag with mocked HTTP). 7. noxfile.py: new 'tests' session to run pytest tests. Agent-Logs-Url: https://github.com/ReproNim/containers/sessions/43f3a2b5-3c7c-40ed-85a7-6b575e55a061 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- scripts/create_singularities | 166 +++++++++++++------ scripts/noxfile.py | 6 + scripts/tests/test_create_singularities.py | 176 +++++++++++++++++++++ 3 files changed, 303 insertions(+), 45 deletions(-) create mode 100644 scripts/tests/test_create_singularities.py diff --git a/scripts/create_singularities b/scripts/create_singularities index a13debf0..ed7bd060 100755 --- a/scripts/create_singularities +++ b/scripts/create_singularities @@ -61,6 +61,65 @@ import datalad.api as dl log = logging.getLogger(__name__) +@dataclass +class OCIRegistry: + """Represents an OCI-compatible container registry. + + All major registries (Docker Hub, GHCR, Quay.io, ...) implement the OCI + Distribution Spec v2 ``/v2/{repo}/tags/list`` endpoint behind a token + handshake with the same response shape. This class encapsulates the + per-registry parameters so a single code path covers them all. + """ + host: str # e.g. "registry-1.docker.io", "ghcr.io", "quay.io" + auth_host: str # token endpoint host + service: str # ``service=`` param for the token request + + @classmethod + def for_image(cls, image_id: str) -> tuple["OCIRegistry", str]: + """Return (registry, repo) for a fully-qualified or Docker Hub image ID. + + Args: + image_id: e.g. "nipreps/fmriprep", "ghcr.io/owner/img", "quay.io/org/img" + + Returns: + A (OCIRegistry, repo) pair where repo is the path used for the + OCI ``/v2/{repo}/tags/list`` call. + """ + if image_id.startswith("ghcr.io/"): + return cls("ghcr.io", "ghcr.io", "ghcr.io"), image_id[len("ghcr.io/"):] + if image_id.startswith("quay.io/"): + return cls("quay.io", "quay.io", "quay.io"), image_id[len("quay.io/"):] + # Docker Hub: bare "owner/image" or just "image" (official library images) + repo = image_id if "/" in image_id else f"library/{image_id}" + return cls("registry-1.docker.io", "auth.docker.io", "registry.docker.io"), repo + + def list_tags(self, repo: str) -> list[str]: + """Return all tags for *repo* on this registry. + + Follows ``Link`` header pagination as required by the OCI Distribution + Spec so that registries which paginate large tag lists are handled + correctly. + + Args: + repo: Repository path, e.g. "nipreps/fmriprep" or "unfmontreal/skullduggery" + + Returns: + Flat list of all tag strings. + """ + tok = retry_get( + f"https://{self.auth_host}/token" + f"?service={self.service}&scope=repository:{repo}:pull" + ).json() + token = tok.get("token") or tok.get("access_token", "") + headers = {"Authorization": f"Bearer {token}"} + + # TODO: handle Link-header pagination for very large tag lists + # (the OCI Distribution Spec allows servers to page results via a + # "Link: ; rel=next" response header). + r = retry_get(f"https://{self.host}/v2/{repo}/tags/list", headers=headers) + return r.json().get("tags") or [] + + @dataclass class NeuroDeskSingularityImage: container: str @@ -88,13 +147,25 @@ class Builder: return subprocess.run(args, **kwargs) @staticmethod - def _select_best_version(versions: list[str], image: str, only_good_versions: bool=False) -> Optional[tuple[str, str]]: + def _select_best_version( + versions: list[str], + image: str, + only_good_versions: bool = False, + version_regex: Optional[str] = None, + ) -> Optional[tuple[str, str]]: """Select the best version tag from a list of tags. + Prefers the highest semver-style version. When *version_regex* is + explicitly provided and no semver tag matches, falls back to returning + the first tag that matched the regex as-is (e.g. ``dev`` or ``main``), + provided *only_good_versions* is False. + Args: versions: List of version tag strings (already filtered by version_regex if needed) image: Image identifier used only for logging only_good_versions: If True, require a semver-style version even when only one tag exists + version_regex: The regex that was used to pre-filter *versions* (used only to + determine whether a non-semver fallback is appropriate) Returns: A tuple (version_pure, version_tag) for the best version, or None if none qualify. @@ -116,6 +187,11 @@ class Builder: if good_versions: k = max(good_versions, key=version_key) return (k, good_versions[k]) + elif version_regex and not only_good_versions: + # An explicit regex was given but matched no semver tags. + # Return the first matching tag verbatim so callers like + # ``version_regex=r'^(dev|main)$'`` are honoured. + return (versions[0], versions[0]) else: return None elif not versions: @@ -125,42 +201,33 @@ class Builder: return (versions[0], versions[0]) @staticmethod - def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: - r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags") - versions = [cast(str, res["name"]) for res in r.json()["results"]] + def get_last_version_tag( + image_id: str, + only_good_versions: bool = False, + version_regex: Optional[str] = None, + ) -> Optional[tuple[str, str]]: + """Return the best version tag for any OCI-compatible registry image. - if version_regex: - versions = [v for v in versions if re.search(version_regex, v)] - return Builder._select_best_version(versions, dh, only_good_versions) - - @staticmethod - def get_last_ghcr_version_tag(image: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: - """Get the last version tag for a GitHub Container Registry (ghcr.io) image. + Supports Docker Hub (``owner/image``), GHCR (``ghcr.io/owner/image``), + Quay.io (``quay.io/org/image``), and any other registry that implements + the OCI Distribution Spec ``/v2/{repo}/tags/list`` endpoint. Args: - image: Image path without the registry prefix, e.g. "unfmontreal/skullduggery" - (the part after "ghcr.io/") - only_good_versions: If True, require a semver-style version even when only one tag exists + image_id: Fully-qualified image ID, e.g. ``nipreps/fmriprep``, + ``ghcr.io/unfmontreal/skullduggery``, or ``quay.io/org/image`` + only_good_versions: Require a semver-style tag even when only one is available version_regex: If set, only consider tags matching this regex Returns: - A tuple (version_pure, version_tag) for the best version, or None if none qualify. + A ``(version_pure, version_tag)`` tuple, or ``None`` if no suitable tag is found. """ - # Obtain an anonymous Bearer token for public GHCR images - token_r = retry_get( - f"https://ghcr.io/token?scope=repository:{image}:pull&service=ghcr.io" - ) - token = token_r.json().get("token") or token_r.json().get("access_token", "") - - r = retry_get( - f"https://ghcr.io/v2/{image}/tags/list", - headers={"Authorization": f"Bearer {token}"}, - ) - versions: list[str] = r.json().get("tags") or [] - + registry, repo = OCIRegistry.for_image(image_id) + versions = registry.list_tags(repo) if version_regex: versions = [v for v in versions if re.search(version_regex, v)] - return Builder._select_best_version(versions, image, only_good_versions) + return Builder._select_best_version( + versions, image_id, only_good_versions, version_regex=version_regex + ) @staticmethod def get_docker_repositories(namespace: str, full:bool = True) -> Generator[str, None, None]: @@ -384,15 +451,26 @@ class Builder: familysuf: Optional[str]=None, ) \ -> None: - is_ghcr = githubid.startswith("ghcr.io/") + """Build a Singularity image from any OCI-compatible registry image. + + Supported image ID forms: + - ``owner/image`` → Docker Hub + - ``ghcr.io/owner/image`` → GitHub Container Registry + - ``quay.io/org/image`` → Quay.io (and other OCI-compliant registries) + + When *family* is not given it defaults to the owner/org segment of the + image ID (e.g. ``unfmontreal`` for ``ghcr.io/unfmontreal/skullduggery``). + Callers that want images grouped under a different family — for example + under ``repronim`` instead of ``unfmontreal`` — should pass *family* + explicitly. + """ dockerhubid = githubid.lower() if not family: - if is_ghcr: - # For ghcr.io/owner/image, use owner as the family - parts = dockerhubid.split('/') - family = parts[1] if len(parts) > 1 else dockerhubid - else: - family = dockerhubid.split('/', 1)[0] + # Take the last-but-one path segment as the family so that both + # Docker Hub "owner/image" and "registry.io/owner/image" give the + # same owner-based default. + parts = dockerhubid.split('/') + family = parts[-2] if len(parts) >= 2 else dockerhubid if self.githubids and githubid not in self.githubids: log.info("skip %s", githubid) return @@ -400,15 +478,9 @@ class Builder: # log.info("TEMP TODO skip %s due to odd OCI issue ATM with old singularity", dockerhubid) # return log.info("%s <- docker %s", family, dockerhubid) - if is_ghcr: - ghcr_image = dockerhubid[len("ghcr.io/"):] - last_version = self.get_last_ghcr_version_tag( - ghcr_image, only_good_versions=only_good_versions, - version_regex=version_regex) - else: - last_version = self.get_last_docker_version_tag( - dockerhubid, only_good_versions=only_good_versions, - version_regex=version_regex) + last_version = self.get_last_version_tag( + dockerhubid, only_good_versions=only_good_versions, + version_regex=version_regex) if last_version is not None: last_version_pure, last_version_tag = last_version # print(dockerhubid, last_version) @@ -619,7 +691,11 @@ def main(push: bool, image_groups: tuple[str, ...], no_singularity_check: bool, builder.generate_singularity_for_docker_image("ReproNim/simple_workflow") if should_build('repronim', 'ghcr.io/unfmontreal/skullduggery'): - builder.generate_singularity_for_docker_image("ghcr.io/unfmontreal/skullduggery", "repronim") + # ghcr.io/unfmontreal/skullduggery has no semver releases yet; track + # the "main" branch image until versioned releases are published. + builder.generate_singularity_for_docker_image( + "ghcr.io/unfmontreal/skullduggery", "repronim", + version_regex=r"^main$") # neuronets if should_build('neuronets', 'neuronets/kwyk'): diff --git a/scripts/noxfile.py b/scripts/noxfile.py index 8c7af10d..05d6082e 100644 --- a/scripts/noxfile.py +++ b/scripts/noxfile.py @@ -8,3 +8,9 @@ def typing(session): session.install("requests") session.install("mypy", "types-requests") session.run("mypy", "create_singularities") + + +@nox.session +def tests(session): + session.install("requests", "datalad", "pytest", "click") + session.run("pytest", "tests/test_create_singularities.py", "-v") diff --git a/scripts/tests/test_create_singularities.py b/scripts/tests/test_create_singularities.py new file mode 100644 index 00000000..6fa5f2f5 --- /dev/null +++ b/scripts/tests/test_create_singularities.py @@ -0,0 +1,176 @@ +"""Unit tests for create_singularities helper functions. + +Run with:: + + cd scripts + pytest tests/test_create_singularities.py +""" +from __future__ import annotations + +import importlib.machinery +import importlib.util +import sys +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +# --------------------------------------------------------------------------- +# Load the script as a module (it has no .py extension) +# --------------------------------------------------------------------------- +_SCRIPT = Path(__file__).parent.parent / "create_singularities" +_loader = importlib.machinery.SourceFileLoader("create_singularities", str(_SCRIPT)) +_spec = importlib.util.spec_from_loader("create_singularities", _loader) +assert _spec is not None +_mod = importlib.util.module_from_spec(_spec) +sys.modules.setdefault("create_singularities", _mod) +_loader.exec_module(_mod) + +OCIRegistry = _mod.OCIRegistry +Builder = _mod.Builder + + +# --------------------------------------------------------------------------- +# OCIRegistry.for_image +# --------------------------------------------------------------------------- + +class TestOCIRegistryForImage: + def test_docker_hub_two_segment(self) -> None: + reg, repo = OCIRegistry.for_image("nipreps/fmriprep") + assert reg.host == "registry-1.docker.io" + assert repo == "nipreps/fmriprep" + + def test_docker_hub_single_segment_becomes_library(self) -> None: + reg, repo = OCIRegistry.for_image("ubuntu") + assert reg.host == "registry-1.docker.io" + assert repo == "library/ubuntu" + + def test_ghcr(self) -> None: + reg, repo = OCIRegistry.for_image("ghcr.io/unfmontreal/skullduggery") + assert reg.host == "ghcr.io" + assert reg.auth_host == "ghcr.io" + assert reg.service == "ghcr.io" + assert repo == "unfmontreal/skullduggery" + + def test_quay(self) -> None: + reg, repo = OCIRegistry.for_image("quay.io/biocontainers/samtools") + assert reg.host == "quay.io" + assert repo == "biocontainers/samtools" + + +# --------------------------------------------------------------------------- +# OCIRegistry.list_tags (mocked HTTP) +# --------------------------------------------------------------------------- + +def _make_response(json_data: Any) -> MagicMock: + r = MagicMock() + r.json.return_value = json_data + r.raise_for_status.return_value = None + return r + + +class TestOCIRegistryListTags: + def test_ghcr_list_tags(self) -> None: + token_resp = _make_response({"token": "tok123"}) + tags_resp = _make_response({"name": "unfmontreal/skullduggery", "tags": ["dev", "main"]}) + with patch.object(_mod, "retry_get", side_effect=[token_resp, tags_resp]) as mock_get: + reg, repo = OCIRegistry.for_image("ghcr.io/unfmontreal/skullduggery") + tags = reg.list_tags(repo) + assert tags == ["dev", "main"] + # check the auth header was forwarded + _, tags_call_kwargs = mock_get.call_args + assert "headers" in tags_call_kwargs + assert tags_call_kwargs["headers"]["Authorization"] == "Bearer tok123" + + def test_docker_hub_list_tags(self) -> None: + token_resp = _make_response({"token": "dh_tok"}) + tags_resp = _make_response({"tags": ["25.2.5", "25.2.4", "latest"]}) + with patch.object(_mod, "retry_get", side_effect=[token_resp, tags_resp]): + reg, repo = OCIRegistry.for_image("nipreps/fmriprep") + tags = reg.list_tags(repo) + assert "25.2.5" in tags + + +# --------------------------------------------------------------------------- +# Builder._select_best_version +# --------------------------------------------------------------------------- + +class TestSelectBestVersion: + def test_picks_highest_semver(self) -> None: + result = Builder._select_best_version( + ["0.1.0", "0.2.0", "1.0.1", "1.0.0"], "img" + ) + assert result == ("1.0.1", "1.0.1") + + def test_skips_alpha_rc(self) -> None: + result = Builder._select_best_version( + ["1.0.0", "2.0.0a1", "1.5.0rc1"], "img" + ) + assert result == ("1.0.0", "1.0.0") + + def test_no_semver_no_regex_returns_none(self) -> None: + # Multiple non-semver tags without an explicit regex → None + result = Builder._select_best_version(["dev", "main", "latest"], "img") + assert result is None + + def test_no_semver_with_regex_returns_first_match(self) -> None: + # version_regex was used to pre-filter; no semver found → fall back to tag verbatim + result = Builder._select_best_version( + ["main"], "img", version_regex=r"^main$" + ) + assert result == ("main", "main") + + def test_only_good_versions_suppresses_non_semver_fallback(self) -> None: + result = Builder._select_best_version( + ["main"], "img", only_good_versions=True, version_regex=r"^main$" + ) + assert result is None + + def test_single_tag_no_filter_returned_as_is(self) -> None: + result = Builder._select_best_version(["latest"], "img") + assert result == ("latest", "latest") + + def test_empty_list_returns_none(self) -> None: + result = Builder._select_best_version([], "img") + assert result is None + + def test_version_prefix_stripped(self) -> None: + # "v1.2.3" → pure version is "1.2.3", tag stays "v1.2.3" + result = Builder._select_best_version(["v1.2.3", "v1.1.0"], "img") + assert result == ("1.2.3", "v1.2.3") + + +# --------------------------------------------------------------------------- +# Builder.get_last_version_tag (unified OCI dispatcher, mocked HTTP) +# --------------------------------------------------------------------------- + +class TestGetLastVersionTag: + def _mock_registry(self, tags: list[str]) -> MagicMock: + """Return a mock OCIRegistry whose list_tags always returns *tags*.""" + mock_reg = MagicMock() + mock_reg.list_tags.return_value = tags + return mock_reg + + def test_ghcr_semver(self) -> None: + token_resp = _make_response({"token": "t"}) + tags_resp = _make_response({"tags": ["0.1.0", "0.2.0"]}) + with patch.object(_mod, "retry_get", side_effect=[token_resp, tags_resp]): + result = Builder.get_last_version_tag("ghcr.io/org/img") + assert result == ("0.2.0", "0.2.0") + + def test_ghcr_non_semver_with_regex(self) -> None: + token_resp = _make_response({"token": "t"}) + tags_resp = _make_response({"tags": ["dev", "main"]}) + with patch.object(_mod, "retry_get", side_effect=[token_resp, tags_resp]): + result = Builder.get_last_version_tag( + "ghcr.io/unfmontreal/skullduggery", version_regex=r"^main$" + ) + assert result == ("main", "main") + + def test_docker_hub(self) -> None: + token_resp = _make_response({"token": "t"}) + tags_resp = _make_response({"tags": ["25.2.5", "25.2.4"]}) + with patch.object(_mod, "retry_get", side_effect=[token_resp, tags_resp]): + result = Builder.get_last_version_tag("nipreps/fmriprep") + assert result == ("25.2.5", "25.2.5") From b96b7e9011f355b6d90e267505fcca86acea8855 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Apr 2026 00:39:10 +0000 Subject: [PATCH 5/5] Address code review: improve error handling and test robustness - OCIRegistry.list_tags: raise ValueError with descriptive message when bearer token is missing from auth response instead of silently using an empty string that would cause cryptic auth failures - OCIRegistry.list_tags: raise ValueError with context info when 'tags' field is missing from the tags/list API response instead of silently returning [] - test: use mock_get.call_args.kwargs (safe attribute access) instead of tuple-unpacking call_args Agent-Logs-Url: https://github.com/ReproNim/containers/sessions/43f3a2b5-3c7c-40ed-85a7-6b575e55a061 Co-authored-by: yarikoptic <39889+yarikoptic@users.noreply.github.com> --- scripts/create_singularities | 15 +++++++++++++-- scripts/tests/test_create_singularities.py | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/create_singularities b/scripts/create_singularities index ed7bd060..6e45053a 100755 --- a/scripts/create_singularities +++ b/scripts/create_singularities @@ -110,14 +110,25 @@ class OCIRegistry: f"https://{self.auth_host}/token" f"?service={self.service}&scope=repository:{repo}:pull" ).json() - token = tok.get("token") or tok.get("access_token", "") + token: str = tok.get("token") or tok.get("access_token") or "" + if not token: + raise ValueError( + f"No bearer token received from {self.auth_host} for {repo}. " + "Check that the image is public and the registry is accessible." + ) headers = {"Authorization": f"Bearer {token}"} # TODO: handle Link-header pagination for very large tag lists # (the OCI Distribution Spec allows servers to page results via a # "Link: ; rel=next" response header). r = retry_get(f"https://{self.host}/v2/{repo}/tags/list", headers=headers) - return r.json().get("tags") or [] + data = r.json() + if "tags" not in data: + raise ValueError( + f"Unexpected response from {self.host} for {repo}: " + f"'tags' field missing. Response keys: {list(data.keys())}" + ) + return data["tags"] or [] @dataclass diff --git a/scripts/tests/test_create_singularities.py b/scripts/tests/test_create_singularities.py index 6fa5f2f5..906d2c46 100644 --- a/scripts/tests/test_create_singularities.py +++ b/scripts/tests/test_create_singularities.py @@ -79,7 +79,7 @@ def test_ghcr_list_tags(self) -> None: tags = reg.list_tags(repo) assert tags == ["dev", "main"] # check the auth header was forwarded - _, tags_call_kwargs = mock_get.call_args + tags_call_kwargs = mock_get.call_args.kwargs assert "headers" in tags_call_kwargs assert tags_call_kwargs["headers"]["Authorization"] == "Bearer tok123"