From 595b3e7bd567f534357b6901d659842376c80e07 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 11 Mar 2026 12:08:53 +0100 Subject: [PATCH 01/16] Support Github revision with backend generic provider --- bot/code_review_bot/__init__.py | 44 ++++----- bot/code_review_bot/backend.py | 57 +++++------- bot/code_review_bot/cli.py | 15 +++- bot/code_review_bot/config.py | 10 +++ bot/code_review_bot/revisions/__init__.py | 3 +- bot/code_review_bot/revisions/base.py | 39 ++++++-- bot/code_review_bot/revisions/github.py | 93 ++++++++++++++++++++ bot/code_review_bot/revisions/phabricator.py | 41 ++++++++- bot/code_review_bot/workflow.py | 36 ++++++-- 9 files changed, 259 insertions(+), 79 deletions(-) create mode 100644 bot/code_review_bot/revisions/github.py diff --git a/bot/code_review_bot/__init__.py b/bot/code_review_bot/__init__.py index 1bfe0c0f8..2c8d2b79c 100644 --- a/bot/code_review_bot/__init__.py +++ b/bot/code_review_bot/__init__.py @@ -191,41 +191,29 @@ def hash(self): We make the assumption that the message does not contain the line number If an error occurs reading the file content (locally or remotely), None is returned """ + from code_review_bot.revisions import GithubRevision, PhabricatorRevision + assert self.revision is not None, "Missing revision" + local_repository = None + if isinstance(self.revision, PhabricatorRevision): + if settings.mercurial_cache_checkout: + local_repository = settings.mercurial_cache_checkout + elif isinstance(self.revision, GithubRevision): + assert ( + settings.github_cache + ), "Github cache repository is mandatory to analyse a github revision" + local_repository = settings.github_cache + else: + raise Exception(self.revision.__class__) + raise NotImplementedError + # Build the hash only if the file is not autogenerated. # An autogenerated file resides in the build directory that it has the # format `obj-x86_64-pc-linux-gnu` file_content = None if "/obj-" not in self.path: - if settings.mercurial_cache_checkout: - logger.debug("Using the local repository to build issue's hash") - try: - with (settings.mercurial_cache_checkout / self.path).open() as f: - file_content = f.read() - except (FileNotFoundError, IsADirectoryError): - logger.warning( - "Failed to find issue's related file", path=self.path - ) - file_content = None - else: - try: - # Load all the lines affected by the issue - file_content = self.revision.load_file(self.path) - except ValueError: - # Build the hash with an empty content in case the path is erroneous - file_content = None - except requests.exceptions.HTTPError as e: - if e.response.status_code == 404: - logger.warning( - "Failed to download a file with an issue", path=self.path - ) - - # We still build the hash with empty content - file_content = None - else: - # When encountering another HTTP error, raise the issue - raise + file_content = self.revision.get_file_content(self.path, local_repository) if file_content is None: self._hash = None diff --git a/bot/code_review_bot/backend.py b/bot/code_review_bot/backend.py index 17019b5b3..7a81c6c9a 100644 --- a/bot/code_review_bot/backend.py +++ b/bot/code_review_bot/backend.py @@ -9,6 +9,7 @@ from code_review_bot import taskcluster from code_review_bot.config import GetAppUserAgent, settings +from code_review_bot.revisions import PhabricatorRevision from code_review_bot.tasks.lint import MozLintIssue logger = structlog.get_logger(__name__) @@ -46,38 +47,31 @@ def publish_revision(self, revision): logger.warn("Skipping revision publication on backend") return - # Check the repositories are urls - for url in (revision.base_repository, revision.head_repository): - assert isinstance(url, str), "Repository must be a string" - res = urllib.parse.urlparse(url) - assert res.scheme and res.netloc, f"Repository {url} is not an url" - - # Check the Mercurial changesets are strings - for changeset in ( - revision.base_changeset, - revision.head_changeset, - ): - assert isinstance(changeset, str), "Mercurial changeset must be a string" - - # Create revision on backend if it does not exists - data = { - "phabricator_id": revision.phabricator_id, - "phabricator_phid": revision.phabricator_phid, - "title": revision.title, - "bugzilla_id": revision.bugzilla_id, - "base_repository": revision.base_repository, - "head_repository": revision.head_repository, - "base_changeset": revision.base_changeset, - "head_changeset": revision.head_changeset, - } - - # Try to create the revision, or retrieve it in case it exists with that Phabricator ID. + elif isinstance(revision, PhabricatorRevision): + # Check the repositories are urls + for url in (revision.base_repository, revision.head_repository): + assert isinstance(url, str), "Repository must be a string" + res = urllib.parse.urlparse(url) + assert res.scheme and res.netloc, f"Repository {url} is not an url" + + # Check the Mercurial changesets are strings + for changeset in ( + revision.base_changeset, + revision.head_changeset, + ): + assert isinstance( + changeset, str + ), "Mercurial changeset must be a string" + + revision_data, diff_data = revision.serialize() + + # Try to create the revision, or retrieve it in case it exists with that provider and ID. # The backend always returns a revisions, either a new one, or a pre-existing one revision_url = "/v1/revision/" auth = (self.username, self.password) url_post = urllib.parse.urljoin(self.url, revision_url) response = requests.post( - url_post, headers=GetAppUserAgent(), json=data, auth=auth + url_post, headers=GetAppUserAgent(), json=revision_data, auth=auth ) if not response.ok: logger.warn(f"Backend rejected the payload: {response.content}") @@ -87,17 +81,14 @@ def publish_revision(self, revision): revision.issues_url = backend_revision["issues_bulk_url"] revision.id = backend_revision["id"] - # A revision may have no diff (e.g. Mozilla-central group tasks) - if not revision.diff_id: + # A revision may have no diff (e.g. Phabricator Mozilla-central group tasks) + if isinstance(revision, PhabricatorRevision) and not revision.diff_id: return backend_revision # Create diff attached to revision on backend data = { - "id": revision.diff_id, - "phid": revision.diff_phid, + **diff_data, "review_task_id": settings.taskcluster.task_id, - "mercurial_hash": revision.head_changeset, - "repository": revision.head_repository, } backend_diff = self.create(backend_revision["diffs_url"], data) diff --git a/bot/code_review_bot/cli.py b/bot/code_review_bot/cli.py index 6e2a9a875..8f590f6a0 100644 --- a/bot/code_review_bot/cli.py +++ b/bot/code_review_bot/cli.py @@ -26,7 +26,7 @@ ) from code_review_bot.config import settings from code_review_bot.report import get_reporters -from code_review_bot.revisions import PhabricatorRevision, Revision +from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision from code_review_bot.tools.libmozdata import setup as setup_libmozdata from code_review_bot.tools.log import init_logger from code_review_bot.workflow import Workflow @@ -64,6 +64,13 @@ def parse_cli(): type=Path, default=None, ) + parser.add_argument( + "--github-repository", + help="Optional path to a up-to-date github repository matching the analyzed revision.\n" + "This argument is required for Github reviusions in order to compute issues' hashes based on file content.", + type=Path, + default=None, + ) parser.add_argument("--taskcluster-client-id", help="Taskcluster Client ID") parser.add_argument("--taskcluster-access-token", help="Taskcluster Access token") return parser.parse_args() @@ -116,6 +123,7 @@ def main(): taskcluster.secrets["repositories"], taskcluster.secrets["ssh_key"], args.mercurial_repository, + args.github_repository, ) # Setup statistics @@ -205,6 +213,11 @@ def main(): ) return 1 + if isinstance(revision, GithubRevision): + assert ( + args.github_repository is not None + ), "Girhub revision analysis requires the --github-repository argument to be set" + # Run workflow according to source w = Workflow( reporters, diff --git a/bot/code_review_bot/config.py b/bot/code_review_bot/config.py index c571aa3ef..dfa54b2cb 100644 --- a/bot/code_review_bot/config.py +++ b/bot/code_review_bot/config.py @@ -58,6 +58,7 @@ def __init__(self): # Cache to store whole repositories self.mercurial_cache = None + self.github_cache = None # SSH Key used to push on try self.ssh_key = None @@ -78,6 +79,7 @@ def setup( repositories, ssh_key=None, mercurial_cache=None, + github_cache=None, ): # Detect source from env if "TRY_TASK_ID" in os.environ and "TRY_TASK_GROUP_ID" in os.environ: @@ -148,6 +150,14 @@ def build_conf(nb, repo): # Save ssh key when mercurial cache is enabled self.ssh_key = ssh_key + # Store github cache path + if github_cache is not None: + self.github_cache = Path(github_cache) + assert ( + self.github_cache.exists() + ), f"Github cache does not exist {self.github_cache}" + logger.info("Using Github cache", path=self.mercurial_cache) + def load_user_blacklist(self, usernames, phabricator_api): """ Load all black listed users from Phabricator API diff --git a/bot/code_review_bot/revisions/__init__.py b/bot/code_review_bot/revisions/__init__.py index 10d4224fa..f98bd00a7 100644 --- a/bot/code_review_bot/revisions/__init__.py +++ b/bot/code_review_bot/revisions/__init__.py @@ -3,6 +3,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. from code_review_bot.revisions.base import ImprovementPatch, Revision +from code_review_bot.revisions.github import GithubRevision from code_review_bot.revisions.phabricator import PhabricatorRevision -__all__ = [ImprovementPatch, Revision, PhabricatorRevision] +__all__ = [ImprovementPatch, Revision, PhabricatorRevision, GithubRevision] diff --git a/bot/code_review_bot/revisions/base.py b/bot/code_review_bot/revisions/base.py index 1cd319401..492b9a35a 100644 --- a/bot/code_review_bot/revisions/base.py +++ b/bot/code_review_bot/revisions/base.py @@ -6,6 +6,7 @@ import random from abc import ABC from datetime import timedelta +from pathlib import Path import rs_parsepatch import structlog @@ -165,6 +166,25 @@ def contains(self, issue): lines = set(range(issue.line, issue.line + issue.nb_lines)) return not lines.isdisjoint(modified_lines) + def get_file_content( + self, file_path: str, local_cache_repository: Path | None = None + ): + if local_cache_repository: + logger.debug("Using the local repository to build issue's hash") + try: + with (local_cache_repository / file_path).open() as f: + file_content = f.read() + except (FileNotFoundError, IsADirectoryError): + logger.warning("Failed to find issue's related file", path=file_path) + file_content = None + else: + try: + file_content = self.load_file(file_path) + except ValueError: + # The path is erroneous, consider as empty content + file_content = None + return file_content + @property def has_clang_files(self): """ @@ -237,17 +257,26 @@ def as_dict(self): """ raise NotImplementedError + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff sent to backend + """ + raise NotImplementedError + @staticmethod def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): """ - Load identifiers from Phabricator, using the remote task description + Load identifiers from Phabricator or Github, using the remote task description """ + from code_review_bot.revisions.github import GithubRevision from code_review_bot.revisions.phabricator import PhabricatorRevision # Load build target phid from the task env code_review = try_task["extra"]["code-review"] - # TODO: support github revision here too - return PhabricatorRevision.from_try_task( - code_review, decision_task, phabricator - ) + if "github" in code_review: + return GithubRevision(**code_review["github"]) + else: + return PhabricatorRevision.from_try_task( + code_review, decision_task, phabricator + ) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py new file mode 100644 index 000000000..8e96fc3d7 --- /dev/null +++ b/bot/code_review_bot/revisions/github.py @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from urllib.parse import urlparse + +import requests +import structlog + +from code_review_bot.revisions import Revision + +logger = structlog.get_logger(__name__) + + +class GithubRevision(Revision): + """ + A revision from a github pull-request + """ + + def __init__(self, repo_url, branch, pull_number, pull_head_sha): + super().__init__() + + self.repo_url = repo_url + self.branch = branch + self.pull_number = pull_number + self.pull_head_sha = pull_head_sha + + # Load the patch from Github + self.patch = self.load_patch() + + def __str__(self): + return f"Github pull request {self.repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" + + def __repr__(self): + return f"GithubRevision repo_url={self.repo_url} branch={self.branch} pull_number={self.pull_number} sha={self.pull_head_sha}" + + @property + def repo_name(self): + """ + Extract the name of the repository from its URL + """ + return urlparse(self.repo_url).path.strip("/") + + @property + def repository_slug(self): + """ + Generate a slug from the Github repository. + This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. + """ + base_repo_url = self.pull_request.base.repo.html_url + parsed = urlparse(base_repo_url) + return parsed.path.lstrip("/").replace("/", "-") + + def load_patch(self): + """ + Load the patch content for the current pull request HEAD + """ + # TODO: use specific sha + url = f"{self.repo_url}/pull/{self.pull_number}.diff" + logger.info("Loading github patch", url=url) + resp = requests.get(url, allow_redirects=True) + resp.raise_for_status() + return resp.content.decode() + + def as_dict(self): + return { + "repo_url": self.repo_url, + "branch": self.branch, + "pull_number": self.pull_number, + "pull_head_sha": self.pull_head_sha, + } + + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff (empty for Github) sent to backend + """ + revision = { + "provider": "github", + "provider_id": self.pull_number, + # TODO: Use the pull request information from the API + "title": f"Issue {self.pull_number}", + "bugzilla_id": None, + # TODO: Use the pull request information from the API + "base_repository": self.repo_url, + "head_repository": self.repo_url, + } + diff = { + "provider": "github", + "provider_id": self.pull_head_sha, + "mercurial_hash": self.pull_head_sha, + "repository": self.repo_url, + } + return revision, diff diff --git a/bot/code_review_bot/revisions/phabricator.py b/bot/code_review_bot/revisions/phabricator.py index 8970e19ae..975268600 100644 --- a/bot/code_review_bot/revisions/phabricator.py +++ b/bot/code_review_bot/revisions/phabricator.py @@ -130,12 +130,13 @@ def __str__(self): return f"Phabricator #{self.diff_id} - {self.diff_phid}" @staticmethod - def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI): + def from_try_task( + code_review: dict, decision_task: dict, phabricator: PhabricatorAPI + ): """ Load identifiers from Phabricator, using the remote task description """ # Load build target phid from the task env - code_review = try_task["extra"]["code-review"] build_target_phid = code_review.get("phabricator-diff") or code_review.get( "phabricator-build-target" ) @@ -361,8 +362,17 @@ def load_file(self, path): ) logger.info("Downloading HGMO file", url=url) - response = requests.get(url, headers=GetAppUserAgent()) - response.raise_for_status() + try: + response = requests.get(url, headers=GetAppUserAgent()) + response.raise_for_status() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + logger.warning("Failed to download file", path=self.path) + # Consider as empty content if the file is not found + return None + else: + # When encountering another HTTP error, raise the issue + raise e # Store in cache content = response.content.decode("utf-8") @@ -430,3 +440,26 @@ def as_dict(self): "head_changeset": self.head_changeset, "base_changeset": self.base_changeset, } + + def serialize(self): + """ + Outputs a tuple of dicts for revision and diff sent to backend + """ + revision = { + "provider": "phabricator", + "provider_id": self.phabricator_id, + "title": self.title, + "bugzilla_id": self.bugzilla_id, + "base_repository": self.base_repository, + "head_repository": self.head_repository, + "base_changeset": self.base_changeset, + "head_changeset": self.head_changeset, + } + diff = { + "id": self.diff_id, + "provider_id": self.diff_phid, + "mercurial_hash": self.head_changeset, + "repository": self.head_repository, + } + + return revision, diff diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index 260a638e3..d398abec6 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -20,7 +20,7 @@ from code_review_bot.config import settings from code_review_bot.mercurial import MercurialWorker, Repository, robust_checkout from code_review_bot.report.debug import DebugReporter -from code_review_bot.revisions import PhabricatorRevision, Revision +from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision from code_review_bot.sources.phabricator import ( PhabricatorActions, PhabricatorBuildState, @@ -133,7 +133,7 @@ def run(self, revision): self.clone_repository(revision) # Mark know issues to avoid publishing them on this patch - self.find_previous_issues(issues, base_rev_changeset) + self.find_previous_issues(revision, issues, base_rev_changeset) new_issues_count = sum(issue.new_issue for issue in issues) logger.info( f"Found {new_issues_count} new issues (over {len(issues)} total detected issues)", @@ -265,9 +265,11 @@ def start_analysis(self, revision): logger.warning("Blacklisted author, stopping there.") return - # Cannot run without mercurial cache configured - if not settings.mercurial_cache: - raise Exception("Mercurial cache must be configured to start analysis") + # Cannot run without either mercurial or github cache configured + if not settings.mercurial_cache and not settings.github_cache: + raise Exception( + "One of Mercurial cache or github cache must be configured to start analysis" + ) # Cannot run without ssh key if not settings.ssh_key: @@ -361,6 +363,12 @@ def clone_repository(self, revision): Clone the repo locally when configured On production this should use a Taskcluster cache """ + if not isinstance(revision, PhabricatorRevision): + logger.info( + "Mercurial clone only supports Phabricator revisions, skipping." + ) + return + if not settings.mercurial_cache: logger.debug("Local clone not required") return @@ -490,7 +498,7 @@ def index(self, revision, **kwargs): }, ) - def find_previous_issues(self, issues, base_rev_changeset=None): + def find_previous_issues(self, revision, issues, base_rev_changeset=None): """ Look for known issues in the backend matching the given list of issues @@ -513,9 +521,17 @@ def find_previous_issues(self, issues, base_rev_changeset=None): base_revision_changeset=base_rev_changeset, ) + if isinstance(revision, PhabricatorRevision): + repository_slug = "mozilla-central" + elif isinstance(revision, GithubRevision): + # TODO: Rely on the central repository for known issues + repository_slug = revision.repository_slug + else: + raise NotImplementedError + for path, group_issues in issues_groups: known_issues = self.backend_api.list_repo_issues( - "mozilla-central", + repository_slug, date=current_date, revision_changeset=base_rev_changeset, path=path, @@ -678,6 +694,11 @@ def update_status(self, revision, state): "Only Phabricator revisions are supported for now" ) assert isinstance(state, BuildState) + + # Skip github status update, as we rely on the github reporter for publication + if isinstance(revision, GithubRevision): + return + if not revision.build_target_phid: logger.info( "No build target found, skipping HarborMaster update", state=state.value @@ -701,6 +722,7 @@ def publish_link(self, revision: Revision, slug: str, name: str, url: str): raise NotImplementedError( "Only Phabricator revisions are supported for now" ) + if not revision.build_target_phid: logger.info( "No build target found, skipping HarborMaster link creation", From 9de73ea5364bf31cdd66c6161d35c883f8d7221f Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 11 Mar 2026 14:51:53 +0100 Subject: [PATCH 02/16] Update tests --- bot/tests/conftest.py | 6 +- bot/tests/test_backend.py | 10 +-- bot/tests/test_index.py | 7 +-- bot/tests/test_reporter_phabricator.py | 87 +++++++++++--------------- 4 files changed, 47 insertions(+), 63 deletions(-) diff --git a/bot/tests/conftest.py b/bot/tests/conftest.py index 9d75614dc..caa4ae7fa 100644 --- a/bot/tests/conftest.py +++ b/bot/tests/conftest.py @@ -332,10 +332,12 @@ def mock_revision(mock_phabricator, mock_try_task, mock_decision_task, mock_conf """ Mock a mercurial revision """ - from code_review_bot.revisions import PhabricatorRevision + from code_review_bot.revisions import PhabricatorRevision, Revision with mock_phabricator as api: - return PhabricatorRevision.from_try_task(mock_try_task, mock_decision_task, api) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) + return revision @pytest.fixture diff --git a/bot/tests/test_backend.py b/bot/tests/test_backend.py index e6cc07bba..8d3d922cc 100644 --- a/bot/tests/test_backend.py +++ b/bot/tests/test_backend.py @@ -37,8 +37,8 @@ def test_publication(mock_clang_tidy_issues, mock_revision, mock_backend, mock_h assert revisions[1] == { "bugzilla_id": 1234567, "id": 1, - "phabricator_id": 51, - "phabricator_phid": "PHID-DREV-zzzzz", + "provider": "phabricator", + "provider_id": 51, "title": "Static Analysis tests", "diffs_url": "http://code-review-backend.test/v1/revision/1/diffs/", "issues_bulk_url": "http://code-review-backend.test/v1/revision/1/issues/", @@ -55,7 +55,7 @@ def test_publication(mock_clang_tidy_issues, mock_revision, mock_backend, mock_h "id": 42, "issues_url": "http://code-review-backend.test/v1/diff/42/issues/", "mercurial_hash": "deadbeef1234", - "phid": "PHID-DIFF-test", + "provider_id": "PHID-DIFF-test", "review_task_id": "local instance", "repository": "http://hgmo/test-try", } @@ -132,8 +132,8 @@ def test_missing_bugzilla_id(mock_revision, mock_backend, mock_hgmo): assert revisions[1] == { "id": 1, "bugzilla_id": None, - "phabricator_id": 51, - "phabricator_phid": "PHID-DREV-zzzzz", + "provider": "phabricator", + "provider_id": 51, "title": "Static Analysis tests", "diffs_url": "http://code-review-backend.test/v1/revision/1/diffs/", "issues_bulk_url": "http://code-review-backend.test/v1/revision/1/issues/", diff --git a/bot/tests/test_index.py b/bot/tests/test_index.py index 768bf06d8..fa5e6c032 100644 --- a/bot/tests/test_index.py +++ b/bot/tests/test_index.py @@ -5,7 +5,7 @@ from unittest import mock from code_review_bot.config import TaskCluster -from code_review_bot.revisions import PhabricatorRevision +from code_review_bot.revisions import PhabricatorRevision, Revision class MockPhabricatorRevision(PhabricatorRevision): @@ -199,9 +199,8 @@ def test_index_from_try( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) mock_workflow.index_service = mock.Mock() mock_config.taskcluster = TaskCluster("/tmp/dummy", "12345deadbeef", 0, False) diff --git a/bot/tests/test_reporter_phabricator.py b/bot/tests/test_reporter_phabricator.py index 9fa67ccea..cc9e88af6 100644 --- a/bot/tests/test_reporter_phabricator.py +++ b/bot/tests/test_reporter_phabricator.py @@ -13,7 +13,7 @@ from code_review_bot import Level from code_review_bot.report.phabricator import PhabricatorReporter -from code_review_bot.revisions import ImprovementPatch, PhabricatorRevision +from code_review_bot.revisions import ImprovementPatch, PhabricatorRevision, Revision from code_review_bot.tasks.clang_format import ClangFormatIssue, ClangFormatTask from code_review_bot.tasks.clang_tidy import ClangTidyIssue, ClangTidyTask from code_review_bot.tasks.clang_tidy_external import ( @@ -270,9 +270,8 @@ def test_phabricator_clang_tidy( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "another_test.cpp": [41, 42, 43] @@ -308,9 +307,8 @@ def test_phabricator_clang_format( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.cpp": [41, 42, 43], @@ -352,9 +350,8 @@ def test_phabricator_mozlint( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "python/test.py": [41, 42, 43], @@ -443,9 +440,8 @@ def test_phabricator_coverage( Test Phabricator reporter publication on a mock coverage issue """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -511,9 +507,8 @@ def raise_404(*args, **kwargs): raise HTTPError(response=resp_mock) with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -547,9 +542,8 @@ def test_phabricator_clang_tidy_and_coverage( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -673,9 +667,8 @@ def test_phabricator_analyzers( api.comment = unittest.mock.Mock(return_value=True) # Always use the same setup, only varies the analyzers - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = {"test.cpp": [0, 41, 42, 43], "dom/test.cpp": [42]} revision.id = 52 reporter = PhabricatorReporter( @@ -759,9 +752,8 @@ def test_phabricator_clang_tidy_build_error( from code_review_bot import Level with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.cpp": [41, 42, 43] @@ -819,9 +811,8 @@ def test_full_file( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "xx.cpp": [123, 124, 125] @@ -883,9 +874,8 @@ def test_task_failures(mock_phabricator, phab, mock_try_task, mock_decision_task """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.id = 52 reporter = PhabricatorReporter({"analyzers": ["clang-tidy"]}, api=api) @@ -910,9 +900,8 @@ def test_extra_errors( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = {"path/to/file.py": [1, 2, 3]} revision.files = ["path/to/file.py"] revision.id = 52 @@ -1003,9 +992,8 @@ def test_phabricator_notices(mock_phabricator, phab, mock_try_task, mock_decisio """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.rst": [41, 42, 43], @@ -1053,9 +1041,8 @@ def test_phabricator_tgdiff(mock_phabricator, phab, mock_try_task, mock_decision """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.rst": [41, 42, 43], @@ -1091,9 +1078,8 @@ def test_phabricator_external_tidy( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "another_test.cpp": [41, 42, 43] @@ -1144,9 +1130,8 @@ def test_phabricator_newer_diff( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -1224,9 +1209,8 @@ def test_phabricator_former_diff_comparison( """ with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], @@ -1374,9 +1358,8 @@ def test_phabricator_before_after_comment( mock_taskcluster_config.secrets = {"BEFORE_AFTER_RATIO": 1} with mock_phabricator as api: - revision = PhabricatorRevision.from_try_task( - mock_try_task, mock_decision_task, api - ) + revision = Revision.from_try_task(mock_try_task, mock_decision_task, api) + assert isinstance(revision, PhabricatorRevision) revision.lines = { # Add dummy lines diff "test.txt": [0], From 58f7006996baeb8c2f0260e6ffcac6e3ffd957ea Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Wed, 11 Mar 2026 17:13:36 +0100 Subject: [PATCH 03/16] Github support fixes --- bot/code_review_bot/report/lando.py | 6 ++++-- bot/code_review_bot/revisions/github.py | 3 +-- bot/code_review_bot/workflow.py | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/bot/code_review_bot/report/lando.py b/bot/code_review_bot/report/lando.py index 21ed73771..331a012b0 100644 --- a/bot/code_review_bot/report/lando.py +++ b/bot/code_review_bot/report/lando.py @@ -31,9 +31,11 @@ def publish(self, issues, revision, task_failures, links, reviewers): Send an email to administrators """ if not isinstance(revision, PhabricatorRevision): - raise NotImplementedError( - "Only Phabricator revisions are supported for now" + logger.warning( + "Lando publication only works with Phabricator revisions. Skipping.", + revision=revision, ) + return assert ( revision.phabricator_id and revision.phabricator_phid and revision.diff diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 8e96fc3d7..66e8751af 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -47,8 +47,7 @@ def repository_slug(self): Generate a slug from the Github repository. This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ - base_repo_url = self.pull_request.base.repo.html_url - parsed = urlparse(base_repo_url) + parsed = urlparse(self.repo_url) return parsed.path.lstrip("/").replace("/", "-") def load_patch(self): diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index d398abec6..a02e87c69 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -689,10 +689,10 @@ def update_status(self, revision, state): """ Update build status on HarborMaster """ - if not isinstance(revision, PhabricatorRevision): - raise NotImplementedError( - "Only Phabricator revisions are supported for now" - ) + if isinstance(revision, GithubRevision): + logger.warning("No Lando publication for Github yet") + return + assert isinstance(state, BuildState) # Skip github status update, as we rely on the github reporter for publication From af8dd7db5425495dc7b556243533d57724dc7829 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 2 Apr 2026 15:00:55 +0200 Subject: [PATCH 04/16] Use a + to slugify github repositories --- bot/code_review_bot/revisions/phabricator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/code_review_bot/revisions/phabricator.py b/bot/code_review_bot/revisions/phabricator.py index 975268600..757da41b4 100644 --- a/bot/code_review_bot/revisions/phabricator.py +++ b/bot/code_review_bot/revisions/phabricator.py @@ -85,7 +85,7 @@ def namespaces(self): def repo_slug(url): if url.startswith("https://hg.mozilla.org/"): url = url[23:] - return url.replace("/", "-") + return url.replace("/", "+") out = [] From dd278a0a3cbb6fac74fbf6ee143b1d3022e40695 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Mon, 20 Apr 2026 11:09:42 +0200 Subject: [PATCH 05/16] Update test --- bot/tests/test_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/tests/test_index.py b/bot/tests/test_index.py index fa5e6c032..2068c126f 100644 --- a/bot/tests/test_index.py +++ b/bot/tests/test_index.py @@ -95,8 +95,8 @@ def test_index_autoland( calls = mock_workflow.index_service.insertTask.call_args_list assert [c[0][0] for c in calls] == [ - "project.relman.test.code-review.head_repo.integration-autoland.deadbeef123", - "project.relman.test.code-review.head_repo.integration-autoland.deadbeef123.12345deadbeef", + "project.relman.test.code-review.head_repo.integration+autoland.deadbeef123", + "project.relman.test.code-review.head_repo.integration+autoland.deadbeef123.12345deadbeef", ] # Check all calls have the same shared payload From 2a7eea09c9a82ab87869a5ed5d2841af0e25fb40 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 21 Apr 2026 17:36:54 +0200 Subject: [PATCH 06/16] Use the information from the decision task --- bot/code_review_bot/revisions/base.py | 16 +++++++++------ bot/code_review_bot/revisions/github.py | 27 ++++++++++++------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/bot/code_review_bot/revisions/base.py b/bot/code_review_bot/revisions/base.py index 492b9a35a..0ae3dbcc3 100644 --- a/bot/code_review_bot/revisions/base.py +++ b/bot/code_review_bot/revisions/base.py @@ -271,12 +271,16 @@ def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorA from code_review_bot.revisions.github import GithubRevision from code_review_bot.revisions.phabricator import PhabricatorRevision - # Load build target phid from the task env - code_review = try_task["extra"]["code-review"] - - if "github" in code_review: - return GithubRevision(**code_review["github"]) + task_env = decision_task.get("payload", {}).get("env", {}) + + if task_env.get("GECKO_REPOSITORY_TYPE") == "git": + return GithubRevision( + base_repo_url=task_env["GECKO_BASE_REPOSITORY"], + head_repo_url=task_env["GECKO_HEAD_REPOSITORY"], + pull_number=int(task_env["GECKO_PULL_REQUEST_NUMBER"]), + pull_head_sha=task_env["GECKO_HEAD_REV"], + ) else: return PhabricatorRevision.from_try_task( - code_review, decision_task, phabricator + try_task["extra"]["code-review"], decision_task, phabricator ) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 66e8751af..ce33fa23f 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -17,11 +17,11 @@ class GithubRevision(Revision): A revision from a github pull-request """ - def __init__(self, repo_url, branch, pull_number, pull_head_sha): + def __init__(self, base_repo_url, head_repo_url, pull_number, pull_head_sha): super().__init__() - self.repo_url = repo_url - self.branch = branch + self.base_repo_url = base_repo_url + self.head_repo_url = head_repo_url self.pull_number = pull_number self.pull_head_sha = pull_head_sha @@ -29,17 +29,17 @@ def __init__(self, repo_url, branch, pull_number, pull_head_sha): self.patch = self.load_patch() def __str__(self): - return f"Github pull request {self.repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" + return f"Github pull request {self.base_repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" def __repr__(self): - return f"GithubRevision repo_url={self.repo_url} branch={self.branch} pull_number={self.pull_number} sha={self.pull_head_sha}" + return f"GithubRevision base_repo={self.base_repo_url} head_repo={self.head_repo_url} pull_number={self.pull_number} sha={self.pull_head_sha}" @property def repo_name(self): """ Extract the name of the repository from its URL """ - return urlparse(self.repo_url).path.strip("/") + return urlparse(self.base_repo_url).path.strip("/") @property def repository_slug(self): @@ -47,7 +47,7 @@ def repository_slug(self): Generate a slug from the Github repository. This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ - parsed = urlparse(self.repo_url) + parsed = urlparse(self.base_repo_url) return parsed.path.lstrip("/").replace("/", "-") def load_patch(self): @@ -55,7 +55,7 @@ def load_patch(self): Load the patch content for the current pull request HEAD """ # TODO: use specific sha - url = f"{self.repo_url}/pull/{self.pull_number}.diff" + url = f"{self.base_repo_url}/pull/{self.pull_number}.diff" logger.info("Loading github patch", url=url) resp = requests.get(url, allow_redirects=True) resp.raise_for_status() @@ -63,8 +63,8 @@ def load_patch(self): def as_dict(self): return { - "repo_url": self.repo_url, - "branch": self.branch, + "base_repo_url": self.base_repo_url, + "head_repo_url": self.head_repo_url, "pull_number": self.pull_number, "pull_head_sha": self.pull_head_sha, } @@ -79,14 +79,13 @@ def serialize(self): # TODO: Use the pull request information from the API "title": f"Issue {self.pull_number}", "bugzilla_id": None, - # TODO: Use the pull request information from the API - "base_repository": self.repo_url, - "head_repository": self.repo_url, + "base_repository": self.base_repo_url, + "head_repository": self.head_repo_url, } diff = { "provider": "github", "provider_id": self.pull_head_sha, "mercurial_hash": self.pull_head_sha, - "repository": self.repo_url, + "repository": self.base_repo_url, } return revision, diff From 9e920b3661a9e09d842132275a8d091184e66e28 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 21 Apr 2026 18:25:37 +0200 Subject: [PATCH 07/16] Replace - delimiter by + --- bot/code_review_bot/revisions/github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index ce33fa23f..39ecea069 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -48,7 +48,7 @@ def repository_slug(self): This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ parsed = urlparse(self.base_repo_url) - return parsed.path.lstrip("/").replace("/", "-") + return parsed.path.lstrip("/").replace("/", "+") def load_patch(self): """ From 09b050c79c1af81a5ce9b38a7a5c82fbfdbed7e6 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 21 Apr 2026 18:32:57 +0200 Subject: [PATCH 08/16] Replace + in the repository slug by a _ --- backend/code_review_backend/issues/serializers.py | 2 +- bot/code_review_bot/revisions/github.py | 2 +- bot/code_review_bot/revisions/phabricator.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/code_review_backend/issues/serializers.py b/backend/code_review_backend/issues/serializers.py index d2d49fb45..4fd0e3646 100644 --- a/backend/code_review_backend/issues/serializers.py +++ b/backend/code_review_backend/issues/serializers.py @@ -54,7 +54,7 @@ def to_internal_value(self, url): return # Github repositories uses a path like /, so we need to replace the # inner slash to have a slug usable within URLs (e.g. to list issues on a repo). - slug = parsed.path.lstrip("/").replace("/", "+") + slug = parsed.path.lstrip("/").replace("/", "_") try: repo, _ = self.get_queryset().get_or_create( url=url, defaults={"slug": slug} diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 39ecea069..80e884a11 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -48,7 +48,7 @@ def repository_slug(self): This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ parsed = urlparse(self.base_repo_url) - return parsed.path.lstrip("/").replace("/", "+") + return parsed.path.lstrip("/").replace("/", "_") def load_patch(self): """ diff --git a/bot/code_review_bot/revisions/phabricator.py b/bot/code_review_bot/revisions/phabricator.py index 757da41b4..5ff0b1959 100644 --- a/bot/code_review_bot/revisions/phabricator.py +++ b/bot/code_review_bot/revisions/phabricator.py @@ -85,7 +85,7 @@ def namespaces(self): def repo_slug(url): if url.startswith("https://hg.mozilla.org/"): url = url[23:] - return url.replace("/", "+") + return url.replace("/", "_") out = [] From dd7a1cc55625c24f81552ef76cd6052511553aa4 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Tue, 21 Apr 2026 18:46:41 +0200 Subject: [PATCH 09/16] Update tests --- bot/tests/conftest.py | 18 ++++++++++++++++++ bot/tests/test_index.py | 4 ++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/bot/tests/conftest.py b/bot/tests/conftest.py index caa4ae7fa..79d425460 100644 --- a/bot/tests/conftest.py +++ b/bot/tests/conftest.py @@ -290,6 +290,24 @@ def mock_try_task(): return {"extra": {"code-review": {"phabricator-diff": "PHID-HMBT-test"}}} +@pytest.fixture +def mock_github_decision_task(): + """ + Mock a decision task definition from a github revision + """ + return { + "payload": { + "env": { + "GECKO_REPOSITORY_TYPE": "git", + "GECKO_HEAD_REV": "a" * 40, + "GECKO_BASE_REPOSITORY": "https://github.tests.com/owner/repo-name", + "GECKO_HEAD_REPOSITORY": "https://github.tests.com/owner/repo-name", + "GECKO_PULL_REQUEST_NUMBER": 1, + } + } + } + + @pytest.fixture def mock_decision_task(): """ diff --git a/bot/tests/test_index.py b/bot/tests/test_index.py index 2068c126f..3575f19c7 100644 --- a/bot/tests/test_index.py +++ b/bot/tests/test_index.py @@ -95,8 +95,8 @@ def test_index_autoland( calls = mock_workflow.index_service.insertTask.call_args_list assert [c[0][0] for c in calls] == [ - "project.relman.test.code-review.head_repo.integration+autoland.deadbeef123", - "project.relman.test.code-review.head_repo.integration+autoland.deadbeef123.12345deadbeef", + "project.relman.test.code-review.head_repo.integration_autoland.deadbeef123", + "project.relman.test.code-review.head_repo.integration_autoland.deadbeef123.12345deadbeef", ] # Check all calls have the same shared payload From b8087cc22e76e2a39e114f66e637c67c392f3375 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 11:14:03 +0200 Subject: [PATCH 10/16] Fix backend unit test --- backend/code_review_backend/issues/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/code_review_backend/issues/tests/test_api.py b/backend/code_review_backend/issues/tests/test_api.py index fc1f96b10..4ab0558ce 100644 --- a/backend/code_review_backend/issues/tests/test_api.py +++ b/backend/code_review_backend/issues/tests/test_api.py @@ -143,7 +143,7 @@ def test_create_revision_creates_new_repo(self): self.assertEqual(response.status_code, status.HTTP_201_CREATED) new_repo = Repository.objects.get(url="http://hg.mozilla.org/a/new/repo") self.assertIsNotNone(new_repo) - self.assertEqual(new_repo.slug, "a+new+repo") + self.assertEqual(new_repo.slug, "a_new_repo") def test_create_diff(self): """ From e2efd961b2f9102f5c62839faf66a57fffe66efc Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 16:50:36 +0200 Subject: [PATCH 11/16] Share cache by default --- bot/code_review_bot/cli.py | 7 +------ bot/code_review_bot/config.py | 3 +++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/bot/code_review_bot/cli.py b/bot/code_review_bot/cli.py index 8f590f6a0..c3f3a0d65 100644 --- a/bot/code_review_bot/cli.py +++ b/bot/code_review_bot/cli.py @@ -26,7 +26,7 @@ ) from code_review_bot.config import settings from code_review_bot.report import get_reporters -from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision +from code_review_bot.revisions import PhabricatorRevision, Revision from code_review_bot.tools.libmozdata import setup as setup_libmozdata from code_review_bot.tools.log import init_logger from code_review_bot.workflow import Workflow @@ -213,11 +213,6 @@ def main(): ) return 1 - if isinstance(revision, GithubRevision): - assert ( - args.github_repository is not None - ), "Girhub revision analysis requires the --github-repository argument to be set" - # Run workflow according to source w = Workflow( reporters, diff --git a/bot/code_review_bot/config.py b/bot/code_review_bot/config.py index dfa54b2cb..64fe530a8 100644 --- a/bot/code_review_bot/config.py +++ b/bot/code_review_bot/config.py @@ -157,6 +157,9 @@ def build_conf(nb, repo): self.github_cache.exists() ), f"Github cache does not exist {self.github_cache}" logger.info("Using Github cache", path=self.mercurial_cache) + else: + # Fallback to mercurial cache to ease migration on production systems + self.github_cache = self.mercurial_cache def load_user_blacklist(self, usernames, phabricator_api): """ From 078ef44cbd80f73905b129a2c8b312e37d724db8 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 16:58:09 +0200 Subject: [PATCH 12/16] Use same arguments as for phab rev --- bot/code_review_bot/revisions/base.py | 4 ++-- bot/code_review_bot/revisions/github.py | 26 ++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/bot/code_review_bot/revisions/base.py b/bot/code_review_bot/revisions/base.py index 0ae3dbcc3..e232ecc46 100644 --- a/bot/code_review_bot/revisions/base.py +++ b/bot/code_review_bot/revisions/base.py @@ -275,8 +275,8 @@ def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorA if task_env.get("GECKO_REPOSITORY_TYPE") == "git": return GithubRevision( - base_repo_url=task_env["GECKO_BASE_REPOSITORY"], - head_repo_url=task_env["GECKO_HEAD_REPOSITORY"], + base_repository=task_env["GECKO_BASE_REPOSITORY"], + head_repository=task_env["GECKO_HEAD_REPOSITORY"], pull_number=int(task_env["GECKO_PULL_REQUEST_NUMBER"]), pull_head_sha=task_env["GECKO_HEAD_REV"], ) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 80e884a11..24761f7cb 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -17,11 +17,11 @@ class GithubRevision(Revision): A revision from a github pull-request """ - def __init__(self, base_repo_url, head_repo_url, pull_number, pull_head_sha): + def __init__(self, base_repository, head_repository, pull_number, pull_head_sha): super().__init__() - self.base_repo_url = base_repo_url - self.head_repo_url = head_repo_url + self.base_repository = base_repository + self.head_repository = head_repository self.pull_number = pull_number self.pull_head_sha = pull_head_sha @@ -29,17 +29,17 @@ def __init__(self, base_repo_url, head_repo_url, pull_number, pull_head_sha): self.patch = self.load_patch() def __str__(self): - return f"Github pull request {self.base_repo_url} #{self.pull_number} ({self.pull_head_sha[:8]})" + return f"Github pull request {self.base_repository} #{self.pull_number} ({self.pull_head_sha[:8]})" def __repr__(self): - return f"GithubRevision base_repo={self.base_repo_url} head_repo={self.head_repo_url} pull_number={self.pull_number} sha={self.pull_head_sha}" + return f"GithubRevision base_repo={self.base_repository} head_repo={self.head_repository} pull_number={self.pull_number} sha={self.pull_head_sha}" @property def repo_name(self): """ Extract the name of the repository from its URL """ - return urlparse(self.base_repo_url).path.strip("/") + return urlparse(self.base_repository).path.strip("/") @property def repository_slug(self): @@ -47,7 +47,7 @@ def repository_slug(self): Generate a slug from the Github repository. This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ - parsed = urlparse(self.base_repo_url) + parsed = urlparse(self.base_repository) return parsed.path.lstrip("/").replace("/", "_") def load_patch(self): @@ -55,7 +55,7 @@ def load_patch(self): Load the patch content for the current pull request HEAD """ # TODO: use specific sha - url = f"{self.base_repo_url}/pull/{self.pull_number}.diff" + url = f"{self.base_repository}/pull/{self.pull_number}.diff" logger.info("Loading github patch", url=url) resp = requests.get(url, allow_redirects=True) resp.raise_for_status() @@ -63,8 +63,8 @@ def load_patch(self): def as_dict(self): return { - "base_repo_url": self.base_repo_url, - "head_repo_url": self.head_repo_url, + "base_repository": self.base_repository, + "head_repository": self.head_repository, "pull_number": self.pull_number, "pull_head_sha": self.pull_head_sha, } @@ -79,13 +79,13 @@ def serialize(self): # TODO: Use the pull request information from the API "title": f"Issue {self.pull_number}", "bugzilla_id": None, - "base_repository": self.base_repo_url, - "head_repository": self.head_repo_url, + "base_repository": self.base_repository, + "head_repository": self.head_repository, } diff = { "provider": "github", "provider_id": self.pull_head_sha, "mercurial_hash": self.pull_head_sha, - "repository": self.base_repo_url, + "repository": self.base_repository, } return revision, diff From 1c19e05238130504024883745b965de7a8172865 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 17:13:51 +0200 Subject: [PATCH 13/16] Store changesets too --- bot/code_review_bot/revisions/base.py | 3 ++- bot/code_review_bot/revisions/github.py | 25 ++++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/bot/code_review_bot/revisions/base.py b/bot/code_review_bot/revisions/base.py index e232ecc46..4b2d42bad 100644 --- a/bot/code_review_bot/revisions/base.py +++ b/bot/code_review_bot/revisions/base.py @@ -276,9 +276,10 @@ def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorA if task_env.get("GECKO_REPOSITORY_TYPE") == "git": return GithubRevision( base_repository=task_env["GECKO_BASE_REPOSITORY"], + base_changeset=task_env["GECKO_BASE_REV"], head_repository=task_env["GECKO_HEAD_REPOSITORY"], + head_changeset=task_env["GECKO_HEAD_REV"], pull_number=int(task_env["GECKO_PULL_REQUEST_NUMBER"]), - pull_head_sha=task_env["GECKO_HEAD_REV"], ) else: return PhabricatorRevision.from_try_task( diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 24761f7cb..08d0ded92 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -17,22 +17,30 @@ class GithubRevision(Revision): A revision from a github pull-request """ - def __init__(self, base_repository, head_repository, pull_number, pull_head_sha): + def __init__( + self, + base_repository, + base_changeset, + head_repository, + head_changeset, + pull_number, + ): super().__init__() self.base_repository = base_repository + self.base_changeset = base_changeset self.head_repository = head_repository + self.head_changeset = head_changeset self.pull_number = pull_number - self.pull_head_sha = pull_head_sha # Load the patch from Github self.patch = self.load_patch() def __str__(self): - return f"Github pull request {self.base_repository} #{self.pull_number} ({self.pull_head_sha[:8]})" + return f"Github pull request {self.base_repository} #{self.pull_number} ({self.head_changeset[:8]})" def __repr__(self): - return f"GithubRevision base_repo={self.base_repository} head_repo={self.head_repository} pull_number={self.pull_number} sha={self.pull_head_sha}" + return f"GithubRevision base_repo={self.base_repository} head_repo={self.head_repository} pull_number={self.pull_number} head={self.head_changeset}" @property def repo_name(self): @@ -64,9 +72,10 @@ def load_patch(self): def as_dict(self): return { "base_repository": self.base_repository, + "base_changeset": self.base_changeset, "head_repository": self.head_repository, + "head_changeset": self.head_changeset, "pull_number": self.pull_number, - "pull_head_sha": self.pull_head_sha, } def serialize(self): @@ -80,12 +89,14 @@ def serialize(self): "title": f"Issue {self.pull_number}", "bugzilla_id": None, "base_repository": self.base_repository, + "base_changeset": self.base_changeset, "head_repository": self.head_repository, + "head_changeset": self.head_changeset, } diff = { "provider": "github", - "provider_id": self.pull_head_sha, - "mercurial_hash": self.pull_head_sha, + "provider_id": self.head_changeset, + "mercurial_hash": self.head_changeset, "repository": self.base_repository, } return revision, diff From b7d273cebf5aaf06fb7dbd52df0bd15f0c456483 Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 21:08:31 +0200 Subject: [PATCH 14/16] Clone git repo --- bot/code_review_bot/__init__.py | 4 +- bot/code_review_bot/config.py | 14 +++--- bot/code_review_bot/git.py | 75 +++++++++++++++++++++++++++++++++ bot/code_review_bot/github.py | 0 bot/code_review_bot/workflow.py | 67 +++++++++++++++++++---------- bot/requirements.txt | 1 + 6 files changed, 129 insertions(+), 32 deletions(-) create mode 100644 bot/code_review_bot/git.py delete mode 100644 bot/code_review_bot/github.py diff --git a/bot/code_review_bot/__init__.py b/bot/code_review_bot/__init__.py index 2c8d2b79c..46b8ef09c 100644 --- a/bot/code_review_bot/__init__.py +++ b/bot/code_review_bot/__init__.py @@ -201,9 +201,9 @@ def hash(self): local_repository = settings.mercurial_cache_checkout elif isinstance(self.revision, GithubRevision): assert ( - settings.github_cache + settings.git_cache ), "Github cache repository is mandatory to analyse a github revision" - local_repository = settings.github_cache + local_repository = settings.git_cache else: raise Exception(self.revision.__class__) raise NotImplementedError diff --git a/bot/code_review_bot/config.py b/bot/code_review_bot/config.py index 64fe530a8..1fc398c9a 100644 --- a/bot/code_review_bot/config.py +++ b/bot/code_review_bot/config.py @@ -58,7 +58,7 @@ def __init__(self): # Cache to store whole repositories self.mercurial_cache = None - self.github_cache = None + self.git_cache = None # SSH Key used to push on try self.ssh_key = None @@ -79,7 +79,7 @@ def setup( repositories, ssh_key=None, mercurial_cache=None, - github_cache=None, + git_cache=None, ): # Detect source from env if "TRY_TASK_ID" in os.environ and "TRY_TASK_GROUP_ID" in os.environ: @@ -151,15 +151,15 @@ def build_conf(nb, repo): self.ssh_key = ssh_key # Store github cache path - if github_cache is not None: - self.github_cache = Path(github_cache) + if git_cache is not None: + self.git_cache = Path(git_cache) assert ( - self.github_cache.exists() - ), f"Github cache does not exist {self.github_cache}" + self.git_cache.exists() + ), f"Github cache does not exist {self.git_cache}" logger.info("Using Github cache", path=self.mercurial_cache) else: # Fallback to mercurial cache to ease migration on production systems - self.github_cache = self.mercurial_cache + self.git_cache = self.mercurial_cache def load_user_blacklist(self, usernames, phabricator_api): """ diff --git a/bot/code_review_bot/git.py b/bot/code_review_bot/git.py new file mode 100644 index 000000000..a68957546 --- /dev/null +++ b/bot/code_review_bot/git.py @@ -0,0 +1,75 @@ +from urllib.parse import urlparse + +import structlog +from git import Repo + +logger = structlog.getLogger(__name__) + + +def repo_slug(repo_url): + """ + Build a slug from a github repository url + mozilla/firefox would become mozilla-firefox + """ + parts = urlparse(repo_url) + assert parts.netloc == "github.com", "Only github repositories are supported" + + path = parts.path.lstrip("/") + if path.endswith(".git"): + path = path[:-4] + + return path.replace("/", "-") + + +def git_clone(base_repository, head_repository, revision, destination): + """ + Clone a git repo at a specific revision in a directory + If the repo is already present, fetches and checkout + """ + + # Build slug + base_slug = repo_slug(base_repository) + head_slug = repo_slug(head_repository) + + logger.info(base_slug, head_slug) + + # Clone or fetch upstream + path = destination / base_slug + if path.exists() and (path / ".git").is_dir(): + logger.info("Use existing repo", path=path) + repo = Repo.init(path) + + # Make sure origin matches the url + origin = repo.remotes["origin"] + if origin.url != base_repository: + logger.info("Update remote origin", url=base_repository) + origin.set_url(base_repository) + + # Always update the references for base repo + logger.info("Fetch remote origin") + origin.fetch() + else: + logger.info("Clone git repository", url=base_repository, path=path) + repo = Repo.clone_from(base_repository, path) + + # Fetch head repository as a remote on top of base + try: + head = repo.remotes[head_slug] + + # Make sure head matches the url + if head.url != head_repository: + head.set_url(head_repository) + + except IndexError: + # Setup new remote + head = repo.create_remote(head_slug, head_repository) + + # Always fetch, as creating a remote does not fetch automatically + logger.info("Fetch remote head", url=head.url) + head.fetch() + + # Detach head to specified revision + logger.info("Checkout to head", revision=revision) + repo.head.reference = repo.commit(revision) + + return repo diff --git a/bot/code_review_bot/github.py b/bot/code_review_bot/github.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/bot/code_review_bot/workflow.py b/bot/code_review_bot/workflow.py index a02e87c69..67fbf602a 100644 --- a/bot/code_review_bot/workflow.py +++ b/bot/code_review_bot/workflow.py @@ -18,6 +18,7 @@ ) from code_review_bot.backend import BackendAPI from code_review_bot.config import settings +from code_review_bot.git import git_clone from code_review_bot.mercurial import MercurialWorker, Repository, robust_checkout from code_review_bot.report.debug import DebugReporter from code_review_bot.revisions import GithubRevision, PhabricatorRevision, Revision @@ -266,7 +267,7 @@ def start_analysis(self, revision): return # Cannot run without either mercurial or github cache configured - if not settings.mercurial_cache and not settings.github_cache: + if not settings.mercurial_cache and not settings.git_cache: raise Exception( "One of Mercurial cache or github cache must be configured to start analysis" ) @@ -363,32 +364,52 @@ def clone_repository(self, revision): Clone the repo locally when configured On production this should use a Taskcluster cache """ - if not isinstance(revision, PhabricatorRevision): - logger.info( - "Mercurial clone only supports Phabricator revisions, skipping." - ) - return - - if not settings.mercurial_cache: - logger.debug("Local clone not required") - return if self.clone_available: logger.debug("Local clone already setup") return - logger.info( - "Cloning revision to build issues", - repo=revision.base_repository, - changeset=revision.head_changeset, - dest=settings.mercurial_cache_checkout, - ) - robust_checkout( - repo_upstream_url=revision.base_repository, - repo_url=revision.head_repository, - revision=revision.head_changeset, - checkout_dir=settings.mercurial_cache_checkout, - sharebase_dir=settings.mercurial_cache_sharebase, - ) + if not settings.mercurial_cache and not settings.git_cache: + logger.info("Local clone not required") + return + + if isinstance(revision, PhabricatorRevision): + # Mercurial clone + if not settings.mercurial_cache: + raise Exception( + "Mercurial cache directory is not configured, cannot clone" + ) + logger.info( + "Cloning mercurial revision to build issues", + repo=revision.base_repository, + changeset=revision.head_changeset, + dest=settings.mercurial_cache_checkout, + ) + robust_checkout( + repo_upstream_url=revision.base_repository, + repo_url=revision.head_repository, + revision=revision.head_changeset, + checkout_dir=settings.mercurial_cache_checkout, + sharebase_dir=settings.mercurial_cache_sharebase, + ) + elif isinstance(revision, GithubRevision): + # Git clone + if not settings.git_cache: + raise Exception("Git cache directory is not configured, cannot clone") + logger.info( + "Cloning mercurial revision to build issues", + repo=revision.base_repository, + changeset=revision.head_changeset, + dest=settings.git_cache, + ) + git_clone( + base_repository=revision.base_repository, + head_repository=revision.head_repository, + revision=revision.head_changeset, + destination=settings.git_cache, + ) + else: + raise NotImplementedError + self.clone_available = True def publish(self, revision, issues, task_failures, notices, reviewers): diff --git a/bot/requirements.txt b/bot/requirements.txt index 63a693933..066728067 100644 --- a/bot/requirements.txt +++ b/bot/requirements.txt @@ -1,4 +1,5 @@ aiohttp<4 +gitpython==3.1.47 influxdb==5.3.2 libmozdata==0.2.12 python-hglib==2.6.2 From 78a3a77aafa4798df80bc03e9e7225517d6155ff Mon Sep 17 00:00:00 2001 From: Bastien Abadie Date: Wed, 22 Apr 2026 21:40:05 +0200 Subject: [PATCH 15/16] Fix local git path --- bot/code_review_bot/__init__.py | 2 +- bot/code_review_bot/git.py | 11 ++++++----- bot/code_review_bot/revisions/github.py | 5 ++--- tools/docker/bootstrap-mercurial.sh | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bot/code_review_bot/__init__.py b/bot/code_review_bot/__init__.py index 46b8ef09c..a706255fb 100644 --- a/bot/code_review_bot/__init__.py +++ b/bot/code_review_bot/__init__.py @@ -203,7 +203,7 @@ def hash(self): assert ( settings.git_cache ), "Github cache repository is mandatory to analyse a github revision" - local_repository = settings.git_cache + local_repository = settings.git_cache / self.revision.repository_slug else: raise Exception(self.revision.__class__) raise NotImplementedError diff --git a/bot/code_review_bot/git.py b/bot/code_review_bot/git.py index a68957546..75a19e1f4 100644 --- a/bot/code_review_bot/git.py +++ b/bot/code_review_bot/git.py @@ -6,10 +6,11 @@ logger = structlog.getLogger(__name__) -def repo_slug(repo_url): +def build_repo_slug(repo_url): """ Build a slug from a github repository url - mozilla/firefox would become mozilla-firefox + mozilla-firefox/firefox would become mozilla-firefox_firefox + This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ parts = urlparse(repo_url) assert parts.netloc == "github.com", "Only github repositories are supported" @@ -18,7 +19,7 @@ def repo_slug(repo_url): if path.endswith(".git"): path = path[:-4] - return path.replace("/", "-") + return path.replace("/", "_") def git_clone(base_repository, head_repository, revision, destination): @@ -28,8 +29,8 @@ def git_clone(base_repository, head_repository, revision, destination): """ # Build slug - base_slug = repo_slug(base_repository) - head_slug = repo_slug(head_repository) + base_slug = build_repo_slug(base_repository) + head_slug = build_repo_slug(head_repository) logger.info(base_slug, head_slug) diff --git a/bot/code_review_bot/revisions/github.py b/bot/code_review_bot/revisions/github.py index 08d0ded92..58953a0c8 100644 --- a/bot/code_review_bot/revisions/github.py +++ b/bot/code_review_bot/revisions/github.py @@ -7,6 +7,7 @@ import requests import structlog +from code_review_bot.git import build_repo_slug from code_review_bot.revisions import Revision logger = structlog.get_logger(__name__) @@ -53,10 +54,8 @@ def repo_name(self): def repository_slug(self): """ Generate a slug from the Github repository. - This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field. """ - parsed = urlparse(self.base_repository) - return parsed.path.lstrip("/").replace("/", "_") + return build_repo_slug(self.base_repository) def load_patch(self): """ diff --git a/tools/docker/bootstrap-mercurial.sh b/tools/docker/bootstrap-mercurial.sh index 3d4cf0ba1..c84c8489b 100755 --- a/tools/docker/bootstrap-mercurial.sh +++ b/tools/docker/bootstrap-mercurial.sh @@ -10,7 +10,7 @@ if [[ ! -f $HGRC ]]; then fi apt-get update -apt-get install --no-install-recommends -y curl python-dev-is-python3 gcc openssh-client libjemalloc2 +apt-get install --no-install-recommends -y curl python-dev-is-python3 gcc openssh-client libjemalloc2 git pip install --disable-pip-version-check --quiet --no-cache-dir mercurial==$MERCURIAL_VERSION From b9c02f5835a31f584aaa6e6cde2f343cffe15832 Mon Sep 17 00:00:00 2001 From: Valentin Rigal Date: Thu, 23 Apr 2026 10:40:15 +0200 Subject: [PATCH 16/16] Remove unused fixture --- bot/tests/conftest.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/bot/tests/conftest.py b/bot/tests/conftest.py index 79d425460..caa4ae7fa 100644 --- a/bot/tests/conftest.py +++ b/bot/tests/conftest.py @@ -290,24 +290,6 @@ def mock_try_task(): return {"extra": {"code-review": {"phabricator-diff": "PHID-HMBT-test"}}} -@pytest.fixture -def mock_github_decision_task(): - """ - Mock a decision task definition from a github revision - """ - return { - "payload": { - "env": { - "GECKO_REPOSITORY_TYPE": "git", - "GECKO_HEAD_REV": "a" * 40, - "GECKO_BASE_REPOSITORY": "https://github.tests.com/owner/repo-name", - "GECKO_HEAD_REPOSITORY": "https://github.tests.com/owner/repo-name", - "GECKO_PULL_REQUEST_NUMBER": 1, - } - } - } - - @pytest.fixture def mock_decision_task(): """