Skip to content

Commit 53bda8d

Browse files
authored
chore: update get maintainers to use new JSON API field (#1311)
Signed-off-by: Carl Flottmann <carl.flottmann@oracle.com>
1 parent d044b19 commit 53bda8d

4 files changed

Lines changed: 33 additions & 38 deletions

File tree

src/macaron/malware_analyzer/pypi_heuristics/metadata/closer_release_join_date.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""Analyzer checks whether the maintainers' join date closer to latest package's release date."""
@@ -10,7 +10,7 @@
1010
from macaron.malware_analyzer.datetime_parser import parse_datetime
1111
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
1212
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
13-
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
13+
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset
1414

1515

1616
class CloserReleaseJoinDateAnalyzer(BaseHeuristicAnalyzer):
@@ -33,30 +33,28 @@ def _load_defaults(self) -> int:
3333
return section.getint("timedelta_threshold_of_join_release", 5)
3434
return 5
3535

36-
def _get_maintainers_join_date(self, pypi_registry: PyPIRegistry, package_name: str) -> list[datetime] | None:
36+
def _get_maintainers_join_date(self, pypi_package_json: PyPIPackageJsonAsset) -> list[datetime] | None:
3737
"""Get the join date of the maintainers.
3838
3939
Each package might have multiple maintainers.
4040
4141
Parameters
4242
----------
43-
pypi_registry: PyPIRegistry
44-
The PyPI registry implementation.
45-
package_name: str
46-
The package name.
43+
pypi_package_json: PyPIPackageJsonAsset
44+
The PyPI package JSON asset object.
4745
4846
Returns
4947
-------
5048
list[datetime] | None
5149
The maintainers' join date.
5250
"""
53-
maintainers: list | None = pypi_registry.get_maintainers_of_package(package_name)
51+
maintainers: list | None = pypi_package_json.get_maintainers_of_package()
5452
if maintainers is None:
5553
return None
5654

5755
join_dates: list[datetime] = []
5856
for maintainer in maintainers:
59-
maintainer_join_date = pypi_registry.get_maintainer_join_date(maintainer)
57+
maintainer_join_date = pypi_package_json.pypi_registry.get_maintainer_join_date(maintainer)
6058
if maintainer_join_date is not None:
6159
join_dates.append(maintainer_join_date)
6260

@@ -94,9 +92,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
9492
tuple[HeuristicResult, dict[str, JsonType]]:
9593
The result and related information collected during the analysis.
9694
"""
97-
maintainers_join_date: list[datetime] | None = self._get_maintainers_join_date(
98-
pypi_package_json.pypi_registry, pypi_package_json.component_name
99-
)
95+
maintainers_join_date: list[datetime] | None = self._get_maintainers_join_date(pypi_package_json)
10096
latest_release_date: datetime | None = self._get_latest_release_date(pypi_package_json)
10197
detail_info: dict[str, JsonType] = {
10298
"maintainers_join_date": (

src/macaron/malware_analyzer/pypi_heuristics/metadata/similar_projects.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This analyzer checks if the package has a similar structure to other packages maintained by the same user."""
@@ -50,7 +50,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
5050
similar_projects: list[str] = []
5151
result: HeuristicResult = HeuristicResult.PASS
5252

53-
maintainers = pypi_package_json.pypi_registry.get_maintainers_of_package(pypi_package_json.component_name)
53+
maintainers = pypi_package_json.get_maintainers_of_package()
5454
if not maintainers:
5555
# NOTE: This would ideally raise an error, identifying malformed package information, but issues with
5656
# obtaining maintainer information from the HTML page means this will remains as a SKIP for now.

src/macaron/slsa_analyzer/package_registry/pypi_registry.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -397,26 +397,6 @@ def get_package_page(self, package_name: str) -> str | None:
397397
return html_snippets
398398
return None
399399

400-
def get_maintainers_of_package(self, package_name: str) -> list | None:
401-
"""Implement custom API to get all maintainers of the package.
402-
403-
Parameters
404-
----------
405-
package_name: str
406-
The package name.
407-
408-
Returns
409-
-------
410-
list | None
411-
The list of maintainers.
412-
"""
413-
package_page: str | None = self.get_package_page(package_name)
414-
if package_page is None:
415-
return None
416-
soup = BeautifulSoup(package_page, "html.parser")
417-
maintainers = soup.find_all("span", class_="sidebar-section__user-gravatar-text")
418-
return list({maintainer.get_text(strip=True) for maintainer in maintainers})
419-
420400
def get_maintainer_profile_page(self, username: str) -> str | None:
421401
"""Implement custom API to get maintainer's profile page.
422402
@@ -772,6 +752,25 @@ def get_releases(self) -> dict | None:
772752
"""
773753
return json_extract(self.package_json, ["releases"], dict)
774754

755+
def get_maintainers_of_package(self) -> list | None:
756+
"""Return the names of all maintainers of this package.
757+
758+
Returns
759+
-------
760+
list | None
761+
The list of maintainers.
762+
"""
763+
maintainers: list[str] = []
764+
maintainer_roles = json_extract(self.package_json, ["ownership", "roles"], list)
765+
if maintainer_roles is None:
766+
return None
767+
768+
for maintainer_with_role in maintainer_roles:
769+
if (maintainer := maintainer_with_role.get("user", None)) is not None:
770+
maintainers.append(maintainer)
771+
772+
return maintainers
773+
775774
def get_project_links(self) -> dict | None:
776775
"""Retrieve the project links from the base metadata.
777776

tests/malware_analyzer/pypi/test_closer_release_join_date.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
1+
# Copyright (c) 2024 - 2026, Oracle and/or its affiliates. All rights reserved.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""Tests for closer release join date heuristic."""
@@ -14,7 +14,7 @@ def test_analyze_pass(pypi_package_json: MagicMock) -> None:
1414
analyzer = CloserReleaseJoinDateAnalyzer()
1515

1616
# Set up mock return values.
17-
pypi_package_json.pypi_registry.get_maintainers_of_package.return_value = ["maintainer1", "maintainer2"]
17+
pypi_package_json.get_maintainers_of_package.return_value = ["maintainer1", "maintainer2"]
1818
pypi_package_json.pypi_registry.get_maintainer_join_date.side_effect = [datetime(2018, 1, 1), datetime(2019, 1, 1)]
1919
pypi_package_json.get_latest_release_upload_time.return_value = "2022-06-20T12:00:00"
2020
pypi_package_json.component_name = "mock1"
@@ -33,7 +33,7 @@ def test_analyze_process(pypi_package_json: MagicMock) -> None:
3333
analyzer = CloserReleaseJoinDateAnalyzer()
3434

3535
# Set up mock return values.
36-
pypi_package_json.pypi_registry.get_maintainers_of_package.return_value = ["maintainer1"]
36+
pypi_package_json.get_maintainers_of_package.return_value = ["maintainer1"]
3737
pypi_package_json.pypi_registry.get_maintainer_join_date.side_effect = [datetime(2022, 6, 18)]
3838
pypi_package_json.get_latest_release_upload_time.return_value = "2022-06-20T12:00:00"
3939
pypi_package_json.component_name = "mock1"
@@ -52,7 +52,7 @@ def test_analyze_skip(pypi_package_json: MagicMock) -> None:
5252
analyzer = CloserReleaseJoinDateAnalyzer()
5353

5454
# Set up mock return values.
55-
pypi_package_json.pypi_registry.get_maintainers_of_package.return_value = None
55+
pypi_package_json.get_maintainers_of_package.return_value = None
5656
pypi_package_json.get_latest_release_upload_time.return_value = "2022-06-20T12:00:00"
5757
pypi_package_json.component_name = "mock1"
5858

0 commit comments

Comments
 (0)