diff --git a/docs/source/index.rst b/docs/source/index.rst index 2139f1665..b9c30122f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -100,6 +100,9 @@ the requirements that are currently supported by Macaron. * - ``mcn_license_1`` - **License check** - Check whether the repository license is not in the configured deny-list. - This check detects the repository license via the GitHub API and validates it against a user-defined list of denied SPDX identifiers configured in ``defaults.ini``. If the deny-list is empty, any detected license is accepted. + * - ``mcn_registry_maintainability_1`` + - **Registry maintainability** - Check whether the package exists in its public registry and is actively maintained. + - This check validates registry presence, release recency, and explicit deprecation or yanked status (PyPI, npm). Also checks whether the source repository has been archived and how recently code was pushed, when a GitHub repository is available. The inactivity threshold is configurable via ``defaults.ini`` (``[registry_maintainability] inactivity_threshold_days``). Returns ``UNKNOWN`` when the ecosystem is unsupported or no version is specified in the PURL. ---------------------- How does Macaron work? diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 543da38eb..43eb61650 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -774,3 +774,15 @@ denied_licenses = # If True, the check fails when no license is detected. # If False, a missing license results in a low-confidence pass. require_license = False + +# Registry maintainability configuration. +# The mcn_registry_maintainability_1 check validates whether a package exists +# in its public registry and evaluates its maintenance status. +[registry_maintainability] + +# Maximum number of days since the last release before a package is considered +# unmaintained. Packages exceeding this threshold will fail the check. +# The same threshold is also applied to the time since the last push to the +# source repository (if available via the GitHub API). +# Default: 365 days (approximately 1 year). +inactivity_threshold_days = 365 diff --git a/src/macaron/slsa_analyzer/checks/registry_maintainability_check.py b/src/macaron/slsa_analyzer/checks/registry_maintainability_check.py new file mode 100644 index 000000000..da986cb45 --- /dev/null +++ b/src/macaron/slsa_analyzer/checks/registry_maintainability_check.py @@ -0,0 +1,538 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This check validates whether a package exists in its public registry and is actively maintained.""" + +import logging +import urllib.parse +from datetime import datetime, timezone + +from packageurl import PackageURL +from sqlalchemy import Boolean, ForeignKey, Integer, String +from sqlalchemy.orm import Mapped, mapped_column + +from macaron.config.defaults import defaults +from macaron.database.table_definitions import CheckFacts +from macaron.errors import InvalidHTTPResponseError +from macaron.json_tools import json_extract +from macaron.slsa_analyzer.analyze_context import AnalyzeContext +from macaron.slsa_analyzer.checks.base_check import BaseCheck +from macaron.slsa_analyzer.checks.check_result import ( + CheckResultData, + CheckResultType, + Confidence, + JustificationType, +) +from macaron.slsa_analyzer.git_service.github import GitHub +from macaron.slsa_analyzer.package_registry.npm_registry import NPMRegistry, find_or_create_npm_asset +from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIRegistry, find_or_create_pypi_asset +from macaron.slsa_analyzer.registry import registry +from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo + +logger: logging.Logger = logging.getLogger(__name__) + +_REMEDIATION_GENERIC = ( + "Consider replacing or reviewing this dependency as it may no longer be actively maintained." +) +_REMEDIATION_DEPRECATED = ( + "This package has been explicitly deprecated or removed. Consider replacing this dependency." +) +_REMEDIATION_ARCHIVED = ( + "The source repository has been archived and is no longer accepting contributions." + " Consider replacing this dependency." +) + + +class RegistryMaintainabilityFacts(CheckFacts): + """The ORM mapping for justifications in the registry maintainability check.""" + + __tablename__ = "_registry_maintainability_check" + + #: The primary key. + id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003 + + #: The name of the matched package registry (e.g. PyPI, npm). + registry_name: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: A human-facing link to the package page on the registry. + registry_url: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.HREF}, + ) + + #: A link to the source repository (GitHub), if available. + repository_url: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.HREF}, + ) + + #: Date string of the most recent release of the package (across all versions). + last_release_date: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Number of days elapsed since the most recent release of the package (across all versions). + days_since_release: Mapped[int | None] = mapped_column( + Integer, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Whether the package version is explicitly deprecated or yanked. + is_deprecated: Mapped[bool | None] = mapped_column( + Boolean, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Human-readable reason provided by the registry for the deprecation or yank. + deprecation_reason: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Whether the source repository is archived (GitHub only). + is_archived: Mapped[bool | None] = mapped_column( + Boolean, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Date string of the most recent push to the source repository (GitHub only). + last_commit_date: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Number of days elapsed since the most recent push to the source repository (GitHub only). + days_since_commit: Mapped[int | None] = mapped_column( + Integer, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + #: Suggested remediation action for the user. + remediation: Mapped[str | None] = mapped_column( + String, + nullable=True, + info={"justification": JustificationType.TEXT}, + ) + + __mapper_args__ = { + "polymorphic_identity": "_registry_maintainability_check", + } + + +def _build_registry_url( + registry_info: PackageRegistryInfo, name: str, namespace: str | None, version: str +) -> str | None: + """Build a human-facing package page URL for the given registry and package coordinates. + + Parameters + ---------- + registry_info : PackageRegistryInfo + The matched package registry information. + name : str + The package name. + namespace : str | None + The package namespace (used for scoped npm packages, e.g. ``@scope``). + version : str + The package version. + + Returns + ------- + str | None + The human-facing URL, or ``None`` if the registry type is unsupported. + """ + pkg_registry = registry_info.package_registry + + if isinstance(pkg_registry, PyPIRegistry) and pkg_registry.registry_url: + return urllib.parse.urljoin(pkg_registry.registry_url, f"project/{name}/{version}/") + + if isinstance(pkg_registry, NPMRegistry): + package_name = f"{namespace}/{name}" if namespace else name + return f"https://www.npmjs.com/package/{package_name}/v/{version}" + + return None + + +def _check_deprecated( + registry_info: PackageRegistryInfo, + name: str, + namespace: str | None, + version: str, +) -> tuple[bool | None, str | None]: + """Check whether the package version is explicitly deprecated or yanked. + + The check is ecosystem-specific: + + * **PyPI**: inspects the ``yanked`` flag in the release metadata for the + specific version (``releases[version][i]["yanked"]``). + * **npm**: inspects the top-level ``deprecated`` field in the version + manifest returned by the registry. + * **Other ecosystems**: returns ``(None, None)`` — signal not available. + + Parameters + ---------- + registry_info : PackageRegistryInfo + The matched package registry information. + name : str + The package name. + namespace : str | None + The package namespace (used for scoped npm packages). + version : str + The package version. + + Returns + ------- + tuple[bool | None, str | None] + A tuple ``(is_deprecated, reason)``. When the signal is not available + for the current ecosystem both values are ``None``. + """ + pkg_registry = registry_info.package_registry + + if isinstance(pkg_registry, PyPIRegistry): + pypi_asset = find_or_create_pypi_asset(name, version, registry_info) + if pypi_asset is None: + logger.debug("Could not obtain PyPI package JSON asset for %s@%s.", name, version) + return None, None + + if not (pypi_asset.package_json or pypi_asset.download(dest="")): + logger.debug("Failed to download PyPI package JSON for %s@%s.", name, version) + return None, None + + # The package-level endpoint stores per-version file info under ``releases``. + version_files = json_extract(pypi_asset.package_json, ["releases", version], list) + if version_files: + yanked: bool = bool(version_files[0].get("yanked", False)) + yanked_reason: str | None = version_files[0].get("yanked_reason") or None + return yanked, yanked_reason + + return False, None + + if isinstance(pkg_registry, NPMRegistry): + npm_asset = find_or_create_npm_asset(name, namespace, version, registry_info) + if npm_asset is None: + logger.debug("Could not obtain npm package JSON asset for %s@%s.", name, version) + return None, None + + if not (npm_asset.package_json or npm_asset.download(dest="")): + logger.debug("Failed to download npm package JSON for %s@%s.", name, version) + return None, None + + deprecated_msg = npm_asset.package_json.get("deprecated") + if deprecated_msg: + return True, str(deprecated_msg) + return False, None + + # Maven Central and other ecosystems do not expose a standard deprecation flag. + return None, None + + +def _get_latest_release_timestamp( + registry_info: PackageRegistryInfo, + name: str, + namespace: str | None, + version: str, +) -> datetime | None: + """Return the publish timestamp of the *latest* release of the package. + + This is used for the release-recency signal so that a pinned old version of + an actively maintained package is not incorrectly flagged as unmaintained. + + For PyPI the package-level JSON endpoint already exposes the latest + version's files under the ``urls`` key, so we reuse the already-cached + asset. For npm we resolve the latest version via the registry API and + then query its publish timestamp via deps.dev. + + Parameters + ---------- + registry_info : PackageRegistryInfo + The matched package registry information. + name : str + The package name. + namespace : str | None + The package namespace (used for scoped npm packages). + version : str + The specific version of the analysed PURL, used only as a cache key + when fetching the PyPI asset. + + Returns + ------- + datetime | None + The publish timestamp of the latest release, or ``None`` if it cannot + be determined. + """ + pkg_registry = registry_info.package_registry + + if isinstance(pkg_registry, PyPIRegistry): + pypi_asset = find_or_create_pypi_asset(name, version, registry_info) + if pypi_asset is None: + return None + if not (pypi_asset.package_json or pypi_asset.download(dest="")): + return None + upload_time_str = pypi_asset.get_latest_release_upload_time() + if upload_time_str: + try: + # PyPI upload_time strings use "%Y-%m-%dT%H:%M:%S" (no tz suffix); assume UTC. + return datetime.strptime(upload_time_str, "%Y-%m-%dT%H:%M:%S").replace( + tzinfo=timezone.utc + ) + except ValueError: + logger.debug( + "Could not parse PyPI latest release upload time %r.", upload_time_str + ) + return None + + if isinstance(pkg_registry, NPMRegistry): + latest_version = pkg_registry.get_latest_version(namespace, name) + if latest_version is None: + logger.debug("Could not determine latest version for npm package %s.", name) + return None + latest_purl = str( + PackageURL(type="npm", namespace=namespace, name=name, version=latest_version) + ) + try: + return pkg_registry.find_publish_timestamp(latest_purl) + except InvalidHTTPResponseError as error: + logger.debug( + "Could not retrieve latest release timestamp for npm package %s: %s", name, error + ) + return None + + return None + + +class RegistryMaintainabilityCheck(BaseCheck): + """Check whether a package exists in its public registry and is actively maintained. + + The check evaluates three independent signals when available: + + 1. **Registry presence and release recency** — the package must be found on + its expected public registry, and the most recent release must fall within + the configured inactivity threshold (``inactivity_threshold_days``). + 2. **Deprecated / yanked status** — PyPI yanked releases and npm deprecated + packages cause an immediate failure regardless of release age. + 3. **Source repository archived status and commit recency** — when the + component's source repository is hosted on GitHub, the check also + inspects whether the repository has been archived and how recently code + was pushed. + + The check returns ``UNKNOWN`` when it cannot determine a result (e.g. + unsupported ecosystem, no version in PURL, or an API error). + """ + + def __init__(self) -> None: + """Initialize the check instance.""" + check_id = "mcn_registry_maintainability_1" + description = ( + "Check if the package exists in its expected public registry " + "and is actively maintained." + ) + super().__init__(check_id=check_id, description=description) + + def run_check(self, ctx: AnalyzeContext) -> CheckResultData: + """Run the registry maintainability check. + + Parameters + ---------- + ctx : AnalyzeContext + The object containing processed data for the target component. + + Returns + ------- + CheckResultData + The result of the check. + """ + # A specific version is required to query the registry. + if not ctx.component.version: + logger.debug( + "Skipping %s: no version found in PURL %s.", + self.check_info.check_id, + ctx.component.purl, + ) + return CheckResultData( + result_tables=[ + RegistryMaintainabilityFacts( + remediation=( + "Cannot determine registry status: " + "the PURL does not include a specific version." + ), + confidence=Confidence.LOW, + ) + ], + result_type=CheckResultType.UNKNOWN, + ) + + # Iterate over all registries to find one that matches the component ecosystem + # and can return a publish timestamp. We skip registries that raise + # NotImplementedError e.g. Maven Central or InvalidHTTPResponseError. + registry_infos: list[PackageRegistryInfo] = ctx.dynamic_data["package_registries"] + matched_registry_info: PackageRegistryInfo | None = None + publish_dt: datetime | None = None + + for _registry_info in registry_infos: + if _registry_info.ecosystem != ctx.component.type: + continue + try: + publish_dt = _registry_info.package_registry.find_publish_timestamp( + ctx.component.purl + ) + matched_registry_info = _registry_info + break + except InvalidHTTPResponseError as error: + logger.debug( + "Could not retrieve publish timestamp for %s: %s", + ctx.component.purl, + error, + ) + except NotImplementedError: + continue + + if matched_registry_info is None or publish_dt is None: + logger.debug( + "Skipping %s: no matching package registry found for PURL %s.", + self.check_info.check_id, + ctx.component.purl, + ) + return CheckResultData( + result_tables=[ + RegistryMaintainabilityFacts( + remediation=( + "No supported package registry found for this ecosystem " + "or the registry API is currently unavailable." + ), + confidence=Confidence.LOW, + ) + ], + result_type=CheckResultType.UNKNOWN, + ) + + registry_info = matched_registry_info + pkg_registry = registry_info.package_registry + registry_name: str = type(pkg_registry).__name__.replace("Registry", "") + + # Extract namespace from the PURL once for reuse across signals. + parsed_purl = PackageURL.from_string(ctx.component.purl) + namespace: str | None = parsed_purl.namespace + + now = datetime.now(timezone.utc) + + # Use latest release date of the package for the recency signal. + latest_publish_dt = _get_latest_release_timestamp( + registry_info, ctx.component.name, namespace, ctx.component.version + ) + recency_dt = latest_publish_dt if latest_publish_dt is not None else publish_dt + days_since_release: int = (now - recency_dt).days + last_release_date: str = recency_dt.strftime("%Y-%m-%d") + + # Check for explicit deprecation/yanked flag. + is_deprecated, deprecation_reason = _check_deprecated( + registry_info, + ctx.component.name, + namespace, + ctx.component.version, + ) + + # Retrieve GitHub signals (archived status + last commit). + is_archived: bool | None = None + last_commit_date: str | None = None + days_since_commit: int | None = None + repository_url: str | None = None + + git_service = ctx.dynamic_data.get("git_service") + if isinstance(git_service, GitHub) and ctx.component.repository: + repo = ctx.component.repository + full_name = repo.complete_name.removeprefix("github.com/") + repo_data = git_service.api_client.get_repo_data(full_name) + + if repo_data: + is_archived = bool(repo_data.get("archived", False)) + pushed_at: str | None = repo_data.get("pushed_at") + if pushed_at: + # GitHub timestamps use the ``Z`` suffix; normalise for datetime.fromisoformat() on Python < 3.11. + try: + commit_dt = datetime.fromisoformat(pushed_at.replace("Z", "+00:00")) + days_since_commit = (now - commit_dt).days + last_commit_date = commit_dt.strftime("%Y-%m-%d") + except ValueError: + logger.debug( + "Could not parse pushed_at timestamp %r for %s; skipping commit signal.", + pushed_at, + ctx.component.purl, + ) + repository_url = f"https://github.com/{full_name}" + else: + logger.debug( + "GitHub signals not available for %s: git service is not GitHub or no repository.", + ctx.component.purl, + ) + + # Determine result based on collected signals. + threshold: int = defaults.getint( + "registry_maintainability", "inactivity_threshold_days", fallback=365 + ) + + registry_url = _build_registry_url( + registry_info, ctx.component.name, namespace, ctx.component.version + ) + + result_type: CheckResultType + remediation: str | None + + if is_archived: + result_type = CheckResultType.FAILED + remediation = _REMEDIATION_ARCHIVED + elif is_deprecated: + reason_suffix = f": {deprecation_reason}" if deprecation_reason else "." + remediation = _REMEDIATION_DEPRECATED + reason_suffix + result_type = CheckResultType.FAILED + elif days_since_release > threshold: + result_type = CheckResultType.FAILED + remediation = _REMEDIATION_GENERIC + elif days_since_commit is not None and days_since_commit > threshold: + result_type = CheckResultType.FAILED + remediation = _REMEDIATION_GENERIC + else: + result_type = CheckResultType.PASSED + remediation = None + + # Confidence is HIGH when we have definitive signals. Downgrade to MEDIUM + # when only the release-date signal is available (no GitHub API / deprecated flag). + if days_since_commit is not None or is_deprecated is not None: + confidence = Confidence.HIGH + else: + confidence = Confidence.MEDIUM + + return CheckResultData( + result_tables=[ + RegistryMaintainabilityFacts( + registry_name=registry_name, + registry_url=registry_url, + repository_url=repository_url, + last_release_date=last_release_date, + days_since_release=days_since_release, + is_deprecated=is_deprecated, + deprecation_reason=deprecation_reason, + is_archived=is_archived, + last_commit_date=last_commit_date, + days_since_commit=days_since_commit, + remediation=remediation, + confidence=confidence, + ) + ], + result_type=result_type, + ) + + +registry.register(RegistryMaintainabilityCheck()) diff --git a/tests/integration/cases/npm_request/policy.dl b/tests/integration/cases/npm_request/policy.dl new file mode 100644 index 000000000..f3f923fbb --- /dev/null +++ b/tests/integration/cases/npm_request/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("registry-maintainability", component_id, "Require package to be actively maintained.") :- + check_passed(component_id, "mcn_registry_maintainability_1"). + +apply_policy_to("registry-maintainability", component_id) :- + is_component(component_id, "pkg:npm/request@2.88.2"). diff --git a/tests/integration/cases/npm_request/policy_report.json b/tests/integration/cases/npm_request/policy_report.json new file mode 100644 index 000000000..1bf2dd9de --- /dev/null +++ b/tests/integration/cases/npm_request/policy_report.json @@ -0,0 +1,16 @@ +{ + "passed_policies": [], + "component_satisfies_policy": [], + "failed_policies": [ + [ + "registry-maintainability" + ] + ], + "component_violates_policy": [ + [ + "1", + "pkg:npm/request@2.88.2", + "registry-maintainability" + ] + ] +} diff --git a/tests/integration/cases/npm_request/test.yaml b/tests/integration/cases/npm_request/test.yaml new file mode 100644 index 000000000..6f1db6b10 --- /dev/null +++ b/tests/integration/cases/npm_request/test.yaml @@ -0,0 +1,29 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a deprecated npm package to verify that mcn_registry_maintainability_1 fails + when a package has been explicitly marked as deprecated on the npm registry. + +tags: +- macaron-python-package +- npm-registry-testcase + +steps: +- name: Run macaron analyze on deprecated npm request package. + kind: analyze + options: + command_args: + - -purl + - pkg:npm/request@2.88.2 +- name: Run macaron verify-policy to check maintainability fails. + kind: verify + options: + policy: policy.dl + expect_fail: true +- name: Compare verify policy result. + kind: compare + options: + kind: policy_report + result: output/policy_report.json + expected: policy_report.json diff --git a/tests/integration/cases/pypi_aiohttp/policy.dl b/tests/integration/cases/pypi_aiohttp/policy.dl new file mode 100644 index 000000000..48a686390 --- /dev/null +++ b/tests/integration/cases/pypi_aiohttp/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("registry-maintainability", component_id, "Require package to be actively maintained.") :- + check_passed(component_id, "mcn_registry_maintainability_1"). + +apply_policy_to("registry-maintainability", component_id) :- + is_component(component_id, "pkg:pypi/aiohttp@3.9.3"). diff --git a/tests/integration/cases/pypi_aiohttp/policy_report.json b/tests/integration/cases/pypi_aiohttp/policy_report.json new file mode 100644 index 000000000..31044ae1e --- /dev/null +++ b/tests/integration/cases/pypi_aiohttp/policy_report.json @@ -0,0 +1,16 @@ +{ + "passed_policies": [], + "component_satisfies_policy": [], + "failed_policies": [ + [ + "registry-maintainability" + ] + ], + "component_violates_policy": [ + [ + "1", + "pkg:pypi/aiohttp@3.9.3", + "registry-maintainability" + ] + ] +} diff --git a/tests/integration/cases/pypi_aiohttp/test.yaml b/tests/integration/cases/pypi_aiohttp/test.yaml new file mode 100644 index 000000000..3a8e7a164 --- /dev/null +++ b/tests/integration/cases/pypi_aiohttp/test.yaml @@ -0,0 +1,28 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a yanked PyPI package to verify that mcn_registry_maintainability_1 fails + when a package version has been explicitly yanked from the registry. + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze on yanked aiohttp version. + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/aiohttp@3.9.3 +- name: Run macaron verify-policy to check maintainability fails. + kind: verify + options: + policy: policy.dl + expect_fail: true +- name: Compare verify policy result. + kind: compare + options: + kind: policy_report + result: output/policy_report.json + expected: policy_report.json diff --git a/tests/integration/cases/pypi_arrow/policy.dl b/tests/integration/cases/pypi_arrow/policy.dl index 836cf5dfb..93abaa137 100644 --- a/tests/integration/cases/pypi_arrow/policy.dl +++ b/tests/integration/cases/pypi_arrow/policy.dl @@ -1,4 +1,4 @@ -/* Copyright (c) 2025 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ /* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ #include "prelude.dl" @@ -10,3 +10,9 @@ Policy("has-hosted-build", component_id, "Require a hosted build and publishing apply_policy_to("has-hosted-build", component_id) :- is_component(component_id, purl), match("pkg:pypi/arrow.*", purl). + +Policy("registry-maintainability", component_id, "Require package to be actively maintained.") :- + check_passed(component_id, "mcn_registry_maintainability_1"). + +apply_policy_to("registry-maintainability", component_id) :- + is_component(component_id, "pkg:pypi/arrow@0.15.0"). diff --git a/tests/integration/cases/pypi_arrow/policy_report.json b/tests/integration/cases/pypi_arrow/policy_report.json index a53b03cfe..6abd722fe 100644 --- a/tests/integration/cases/pypi_arrow/policy_report.json +++ b/tests/integration/cases/pypi_arrow/policy_report.json @@ -10,6 +10,9 @@ "failed_policies": [ [ "has-hosted-build" + ], + [ + "registry-maintainability" ] ], "component_violates_policy": [ @@ -17,6 +20,11 @@ "1", "pkg:pypi/arrow@0.15.0", "has-hosted-build" + ], + [ + "1", + "pkg:pypi/arrow@0.15.0", + "registry-maintainability" ] ] } diff --git a/tests/integration/cases/pypi_boto/policy.dl b/tests/integration/cases/pypi_boto/policy.dl new file mode 100644 index 000000000..8e57f1f0c --- /dev/null +++ b/tests/integration/cases/pypi_boto/policy.dl @@ -0,0 +1,10 @@ +/* Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("registry-maintainability", component_id, "Require package to be actively maintained.") :- + check_passed(component_id, "mcn_registry_maintainability_1"). + +apply_policy_to("registry-maintainability", component_id) :- + is_component(component_id, "pkg:pypi/boto@2.49.0"). diff --git a/tests/integration/cases/pypi_boto/policy_report.json b/tests/integration/cases/pypi_boto/policy_report.json new file mode 100644 index 000000000..4adbd1f1a --- /dev/null +++ b/tests/integration/cases/pypi_boto/policy_report.json @@ -0,0 +1,16 @@ +{ + "passed_policies": [], + "component_satisfies_policy": [], + "failed_policies": [ + [ + "registry-maintainability" + ] + ], + "component_violates_policy": [ + [ + "1", + "pkg:pypi/boto@2.49.0", + "registry-maintainability" + ] + ] +} diff --git a/tests/integration/cases/pypi_boto/test.yaml b/tests/integration/cases/pypi_boto/test.yaml new file mode 100644 index 000000000..6f584c0d5 --- /dev/null +++ b/tests/integration/cases/pypi_boto/test.yaml @@ -0,0 +1,28 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing a stale PyPI package to verify that mcn_registry_maintainability_1 fails + when a package has not had a release within the inactivity threshold (365 days). + +tags: +- macaron-python-package + +steps: +- name: Run macaron analyze on stale boto release. + kind: analyze + options: + command_args: + - -purl + - pkg:pypi/boto@2.49.0 +- name: Run macaron verify-policy to check maintainability fails. + kind: verify + options: + policy: policy.dl + expect_fail: true +- name: Compare verify policy result. + kind: compare + options: + kind: policy_report + result: output/policy_report.json + expected: policy_report.json diff --git a/tests/slsa_analyzer/checks/test_registry_maintainability_check.py b/tests/slsa_analyzer/checks/test_registry_maintainability_check.py new file mode 100644 index 000000000..fa08f750f --- /dev/null +++ b/tests/slsa_analyzer/checks/test_registry_maintainability_check.py @@ -0,0 +1,338 @@ +# Copyright (c) 2026 - 2026, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Tests for the registry maintainability check.""" + +import os +from datetime import datetime, timedelta, timezone +from pathlib import Path +from unittest.mock import MagicMock, patch + +from macaron.config.defaults import load_defaults +from macaron.errors import InvalidHTTPResponseError +from macaron.slsa_analyzer.checks.check_result import CheckResultType +from macaron.slsa_analyzer.checks.registry_maintainability_check import RegistryMaintainabilityCheck +from macaron.slsa_analyzer.git_service.base_git_service import NoneGitService +from macaron.slsa_analyzer.git_service.github import GitHub +from macaron.slsa_analyzer.package_registry.npm_registry import NPMRegistry +from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIRegistry +from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo +from tests.conftest import MockAnalyzeContext + +_PYPI_PURL = "pkg:pypi/requests@2.28.0" +_NPM_PURL = "pkg:npm/express@4.18.2" +_NO_VERSION_PURL = "pkg:pypi/requests" + + +def _make_github_service() -> GitHub: + """Return a GitHub git service instance with defaults loaded.""" + service = GitHub() + service.load_defaults() + return service + + +def _load_registry_config(tmp_path: Path, threshold_days: int = 365) -> None: + """Write a temporary ini file with [registry_maintainability] settings and load it.""" + config = f""" +[registry_maintainability] +inactivity_threshold_days = {threshold_days} +""" + config_path = os.path.join(tmp_path, "registry_config.ini") + with open(config_path, "w", encoding="utf-8") as fh: + fh.write(config) + load_defaults(config_path) + + +def _make_pypi_registry_info() -> PackageRegistryInfo: + """Build a minimal PyPI PackageRegistryInfo suitable for tests.""" + pypi_registry = PyPIRegistry() + pypi_registry.load_defaults() + return PackageRegistryInfo(ecosystem="pypi", package_registry=pypi_registry) + + +def _mock_pypi_ctx(macaron_path: Path, purl: str = _PYPI_PURL) -> MockAnalyzeContext: + """Return a MockAnalyzeContext wired up with a PyPI registry.""" + ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl=purl) + ctx.dynamic_data["package_registries"] = [_make_pypi_registry_info()] + ctx.dynamic_data["git_service"] = NoneGitService() + return ctx + +# Tests + + +def test_unknown_no_version(macaron_path: Path, tmp_path: Path) -> None: + """The check returns UNKNOWN when the PURL has no version pinned.""" + _load_registry_config(tmp_path) + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path, purl=_NO_VERSION_PURL) + assert check.run_check(ctx).result_type == CheckResultType.UNKNOWN + + +def test_unknown_no_registries(macaron_path: Path, tmp_path: Path) -> None: + """The check returns UNKNOWN when no package registries are matched.""" + _load_registry_config(tmp_path) + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + ctx.dynamic_data["package_registries"] = [] + assert check.run_check(ctx).result_type == CheckResultType.UNKNOWN + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +def test_unknown_api_error( + mock_timestamp: MagicMock, macaron_path: Path, tmp_path: Path +) -> None: + """The check returns UNKNOWN when deps.dev raises InvalidHTTPResponseError.""" + _load_registry_config(tmp_path) + mock_timestamp.side_effect = InvalidHTTPResponseError("API unavailable") + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.UNKNOWN + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_pass_recent_release( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check passes when the last release is within the threshold.""" + _load_registry_config(tmp_path, threshold_days=365) + recent = datetime.now(timezone.utc) - timedelta(days=30) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.PASSED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_fail_stale_release( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check fails when the last release exceeds the inactivity threshold.""" + _load_registry_config(tmp_path, threshold_days=365) + stale = datetime.now(timezone.utc) - timedelta(days=500) + mock_timestamp.return_value = stale + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_fail_yanked_pypi( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check fails immediately when a PyPI release is yanked, regardless of age.""" + _load_registry_config(tmp_path) + recent = datetime.now(timezone.utc) - timedelta(days=10) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (True, "Security vulnerability discovered.") + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_fail_deprecated_npm( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check fails immediately when an npm package version is deprecated.""" + _load_registry_config(tmp_path) + recent = datetime.now(timezone.utc) - timedelta(days=10) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (True, "Use express@5 instead.") + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + npm_registry = NPMRegistry() + npm_registry.load_defaults() + registry_info = PackageRegistryInfo(ecosystem="npm", package_registry=npm_registry) + + ctx = MockAnalyzeContext(macaron_path=macaron_path, output_dir="", purl=_NPM_PURL) + ctx.dynamic_data["package_registries"] = [registry_info] + ctx.dynamic_data["git_service"] = NoneGitService() + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +@patch("macaron.slsa_analyzer.git_service.github.GitHub.api_client") +def test_fail_archived_repo( + mock_api_client: MagicMock, + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check fails when the GitHub repository is archived, even if release is recent.""" + _load_registry_config(tmp_path) + recent = datetime.now(timezone.utc) - timedelta(days=10) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + mock_api_client.get_repo_data.return_value = { + "archived": True, + "pushed_at": (datetime.now(timezone.utc) - timedelta(days=10)).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ), + } + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + ctx.dynamic_data["git_service"] = _make_github_service() + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +@patch("macaron.slsa_analyzer.git_service.github.GitHub.api_client") +def test_fail_stale_commit( + mock_api_client: MagicMock, + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check fails when the last commit exceeds the threshold, even if release is recent.""" + _load_registry_config(tmp_path, threshold_days=365) + recent = datetime.now(timezone.utc) - timedelta(days=30) + stale_push = datetime.now(timezone.utc) - timedelta(days=500) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + mock_api_client.get_repo_data.return_value = { + "archived": False, + "pushed_at": stale_push.strftime("%Y-%m-%dT%H:%M:%SZ"), + } + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + ctx.dynamic_data["git_service"] = _make_github_service() + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_custom_threshold( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check respects a custom threshold loaded from config.""" + _load_registry_config(tmp_path, threshold_days=60) + # 90 days exceeds the 60-day threshold. + slightly_stale = datetime.now(timezone.utc) - timedelta(days=90) + mock_timestamp.return_value = slightly_stale + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None # fall back to find_publish_timestamp value + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.FAILED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +def test_boundary_at_threshold( + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """The check passes when days_since_release equals the threshold exactly (threshold is exclusive).""" + _load_registry_config(tmp_path, threshold_days=365) + at_threshold = datetime.now(timezone.utc) - timedelta(days=365) + mock_timestamp.return_value = at_threshold + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + assert check.run_check(ctx).result_type == CheckResultType.PASSED + + +@patch( + "macaron.slsa_analyzer.package_registry.package_registry.PackageRegistry.find_publish_timestamp" +) +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._check_deprecated") +@patch("macaron.slsa_analyzer.checks.registry_maintainability_check._get_latest_release_timestamp") +@patch("macaron.slsa_analyzer.git_service.github.GitHub.api_client") +def test_skip_github_for_non_github( + mock_api_client: MagicMock, + mock_latest: MagicMock, + mock_deprecated: MagicMock, + mock_timestamp: MagicMock, + macaron_path: Path, + tmp_path: Path, +) -> None: + """No GitHub API call is made when the git service is not GitHub; check still runs correctly.""" + _load_registry_config(tmp_path) + recent = datetime.now(timezone.utc) - timedelta(days=30) + mock_timestamp.return_value = recent + mock_deprecated.return_value = (False, None) + mock_latest.return_value = None + + check = RegistryMaintainabilityCheck() + ctx = _mock_pypi_ctx(macaron_path) + # git_service is NoneGitService (not GitHub) — API must not be called. + ctx.dynamic_data["git_service"] = NoneGitService() + result = check.run_check(ctx) + + mock_api_client.get_repo_data.assert_not_called() + assert result.result_type == CheckResultType.PASSED