Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backend/code_review_backend/issues/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def to_internal_value(self, url):
return
# Github repositories uses a path like <owner>/<slug>, so we need to replace the
# inner slash to have a slug usable within URLs (e.g. to list issues on a repo).
slug = parsed.path.lstrip("/").replace("/", "+")
slug = parsed.path.lstrip("/").replace("/", "_")
Comment thread
La0 marked this conversation as resolved.
try:
repo, _ = self.get_queryset().get_or_create(
url=url, defaults={"slug": slug}
Expand Down
2 changes: 1 addition & 1 deletion backend/code_review_backend/issues/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def test_create_revision_creates_new_repo(self):
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
new_repo = Repository.objects.get(url="http://hg.mozilla.org/a/new/repo")
self.assertIsNotNone(new_repo)
self.assertEqual(new_repo.slug, "a+new+repo")
self.assertEqual(new_repo.slug, "a_new_repo")

def test_create_diff(self):
"""
Expand Down
44 changes: 16 additions & 28 deletions bot/code_review_bot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,41 +191,29 @@ def hash(self):
We make the assumption that the message does not contain the line number
If an error occurs reading the file content (locally or remotely), None is returned
"""
from code_review_bot.revisions import GithubRevision, PhabricatorRevision

assert self.revision is not None, "Missing revision"

local_repository = None
if isinstance(self.revision, PhabricatorRevision):
if settings.mercurial_cache_checkout:
local_repository = settings.mercurial_cache_checkout
elif isinstance(self.revision, GithubRevision):
assert (
settings.git_cache
), "Github cache repository is mandatory to analyse a github revision"
local_repository = settings.git_cache / self.revision.repository_slug
else:
raise Exception(self.revision.__class__)
Comment thread
Archaeopteryx marked this conversation as resolved.
raise NotImplementedError

# Build the hash only if the file is not autogenerated.
# An autogenerated file resides in the build directory that it has the
# format `obj-x86_64-pc-linux-gnu`
file_content = None
if "/obj-" not in self.path:
if settings.mercurial_cache_checkout:
logger.debug("Using the local repository to build issue's hash")
try:
with (settings.mercurial_cache_checkout / self.path).open() as f:
file_content = f.read()
except (FileNotFoundError, IsADirectoryError):
logger.warning(
"Failed to find issue's related file", path=self.path
)
file_content = None
else:
try:
# Load all the lines affected by the issue
file_content = self.revision.load_file(self.path)
except ValueError:
# Build the hash with an empty content in case the path is erroneous
file_content = None
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
logger.warning(
"Failed to download a file with an issue", path=self.path
)

# We still build the hash with empty content
file_content = None
else:
# When encountering another HTTP error, raise the issue
raise
file_content = self.revision.get_file_content(self.path, local_repository)

if file_content is None:
self._hash = None
Expand Down
57 changes: 24 additions & 33 deletions bot/code_review_bot/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from code_review_bot import taskcluster
from code_review_bot.config import GetAppUserAgent, settings
from code_review_bot.revisions import PhabricatorRevision
from code_review_bot.tasks.lint import MozLintIssue

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -46,38 +47,31 @@ def publish_revision(self, revision):
logger.warn("Skipping revision publication on backend")
return

# Check the repositories are urls
for url in (revision.base_repository, revision.head_repository):
assert isinstance(url, str), "Repository must be a string"
res = urllib.parse.urlparse(url)
assert res.scheme and res.netloc, f"Repository {url} is not an url"

# Check the Mercurial changesets are strings
for changeset in (
revision.base_changeset,
revision.head_changeset,
):
assert isinstance(changeset, str), "Mercurial changeset must be a string"

# Create revision on backend if it does not exists
data = {
"phabricator_id": revision.phabricator_id,
"phabricator_phid": revision.phabricator_phid,
"title": revision.title,
"bugzilla_id": revision.bugzilla_id,
"base_repository": revision.base_repository,
"head_repository": revision.head_repository,
"base_changeset": revision.base_changeset,
"head_changeset": revision.head_changeset,
}

# Try to create the revision, or retrieve it in case it exists with that Phabricator ID.
elif isinstance(revision, PhabricatorRevision):
# Check the repositories are urls
for url in (revision.base_repository, revision.head_repository):
assert isinstance(url, str), "Repository must be a string"
res = urllib.parse.urlparse(url)
assert res.scheme and res.netloc, f"Repository {url} is not an url"

# Check the Mercurial changesets are strings
for changeset in (
revision.base_changeset,
revision.head_changeset,
):
assert isinstance(
changeset, str
), "Mercurial changeset must be a string"

revision_data, diff_data = revision.serialize()

# Try to create the revision, or retrieve it in case it exists with that provider and ID.
# The backend always returns a revisions, either a new one, or a pre-existing one
revision_url = "/v1/revision/"
auth = (self.username, self.password)
url_post = urllib.parse.urljoin(self.url, revision_url)
response = requests.post(
url_post, headers=GetAppUserAgent(), json=data, auth=auth
url_post, headers=GetAppUserAgent(), json=revision_data, auth=auth
)
if not response.ok:
logger.warn(f"Backend rejected the payload: {response.content}")
Expand All @@ -87,17 +81,14 @@ def publish_revision(self, revision):
revision.issues_url = backend_revision["issues_bulk_url"]
revision.id = backend_revision["id"]

# A revision may have no diff (e.g. Mozilla-central group tasks)
if not revision.diff_id:
# A revision may have no diff (e.g. Phabricator Mozilla-central group tasks)
if isinstance(revision, PhabricatorRevision) and not revision.diff_id:
return backend_revision

# Create diff attached to revision on backend
data = {
"id": revision.diff_id,
"phid": revision.diff_phid,
**diff_data,
"review_task_id": settings.taskcluster.task_id,
"mercurial_hash": revision.head_changeset,
"repository": revision.head_repository,
}
backend_diff = self.create(backend_revision["diffs_url"], data)

Expand Down
8 changes: 8 additions & 0 deletions bot/code_review_bot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ def parse_cli():
type=Path,
default=None,
)
parser.add_argument(
"--github-repository",
help="Optional path to a up-to-date github repository matching the analyzed revision.\n"
"This argument is required for Github reviusions in order to compute issues' hashes based on file content.",
type=Path,
default=None,
)
parser.add_argument("--taskcluster-client-id", help="Taskcluster Client ID")
parser.add_argument("--taskcluster-access-token", help="Taskcluster Access token")
return parser.parse_args()
Expand Down Expand Up @@ -116,6 +123,7 @@ def main():
taskcluster.secrets["repositories"],
taskcluster.secrets["ssh_key"],
args.mercurial_repository,
args.github_repository,
)

# Setup statistics
Expand Down
13 changes: 13 additions & 0 deletions bot/code_review_bot/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self):

# Cache to store whole repositories
self.mercurial_cache = None
self.git_cache = None

# SSH Key used to push on try
self.ssh_key = None
Expand All @@ -78,6 +79,7 @@ def setup(
repositories,
ssh_key=None,
mercurial_cache=None,
git_cache=None,
):
# Detect source from env
if "TRY_TASK_ID" in os.environ and "TRY_TASK_GROUP_ID" in os.environ:
Expand Down Expand Up @@ -148,6 +150,17 @@ def build_conf(nb, repo):
# Save ssh key when mercurial cache is enabled
self.ssh_key = ssh_key

# Store github cache path
if git_cache is not None:
self.git_cache = Path(git_cache)
assert (
self.git_cache.exists()
), f"Github cache does not exist {self.git_cache}"
logger.info("Using Github cache", path=self.mercurial_cache)
else:
# Fallback to mercurial cache to ease migration on production systems
self.git_cache = self.mercurial_cache

def load_user_blacklist(self, usernames, phabricator_api):
"""
Load all black listed users from Phabricator API
Expand Down
76 changes: 76 additions & 0 deletions bot/code_review_bot/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from urllib.parse import urlparse

import structlog
from git import Repo

logger = structlog.getLogger(__name__)


def build_repo_slug(repo_url):
"""
Build a slug from a github repository url
mozilla-firefox/firefox would become mozilla-firefox_firefox
This method copies the automatic slug creation in backend's RepositoryGetOrCreateField serializer field.
"""
parts = urlparse(repo_url)
assert parts.netloc == "github.com", "Only github repositories are supported"

path = parts.path.lstrip("/")
if path.endswith(".git"):
path = path[:-4]

return path.replace("/", "_")


def git_clone(base_repository, head_repository, revision, destination):
"""
Clone a git repo at a specific revision in a directory
If the repo is already present, fetches and checkout
"""

# Build slug
base_slug = build_repo_slug(base_repository)
head_slug = build_repo_slug(head_repository)

logger.info(base_slug, head_slug)

# Clone or fetch upstream
path = destination / base_slug
if path.exists() and (path / ".git").is_dir():
logger.info("Use existing repo", path=path)
repo = Repo.init(path)

# Make sure origin matches the url
origin = repo.remotes["origin"]
if origin.url != base_repository:
logger.info("Update remote origin", url=base_repository)
origin.set_url(base_repository)

# Always update the references for base repo
logger.info("Fetch remote origin")
origin.fetch()
else:
logger.info("Clone git repository", url=base_repository, path=path)
repo = Repo.clone_from(base_repository, path)

# Fetch head repository as a remote on top of base
try:
head = repo.remotes[head_slug]

# Make sure head matches the url
if head.url != head_repository:
head.set_url(head_repository)

except IndexError:
# Setup new remote
head = repo.create_remote(head_slug, head_repository)

# Always fetch, as creating a remote does not fetch automatically
logger.info("Fetch remote head", url=head.url)
head.fetch()

# Detach head to specified revision
logger.info("Checkout to head", revision=revision)
repo.head.reference = repo.commit(revision)

return repo
Empty file removed bot/code_review_bot/github.py
Empty file.
6 changes: 4 additions & 2 deletions bot/code_review_bot/report/lando.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ def publish(self, issues, revision, task_failures, links, reviewers):
Send an email to administrators
"""
if not isinstance(revision, PhabricatorRevision):
raise NotImplementedError(
"Only Phabricator revisions are supported for now"
logger.warning(
"Lando publication only works with Phabricator revisions. Skipping.",
revision=revision,
)
return

assert (
revision.phabricator_id and revision.phabricator_phid and revision.diff
Expand Down
3 changes: 2 additions & 1 deletion bot/code_review_bot/revisions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from code_review_bot.revisions.base import ImprovementPatch, Revision
from code_review_bot.revisions.github import GithubRevision
from code_review_bot.revisions.phabricator import PhabricatorRevision

__all__ = [ImprovementPatch, Revision, PhabricatorRevision]
__all__ = [ImprovementPatch, Revision, PhabricatorRevision, GithubRevision]
48 changes: 41 additions & 7 deletions bot/code_review_bot/revisions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import random
from abc import ABC
from datetime import timedelta
from pathlib import Path

import rs_parsepatch
import structlog
Expand Down Expand Up @@ -165,6 +166,25 @@ def contains(self, issue):
lines = set(range(issue.line, issue.line + issue.nb_lines))
return not lines.isdisjoint(modified_lines)

def get_file_content(
self, file_path: str, local_cache_repository: Path | None = None
):
if local_cache_repository:
logger.debug("Using the local repository to build issue's hash")
try:
with (local_cache_repository / file_path).open() as f:
file_content = f.read()
except (FileNotFoundError, IsADirectoryError):
logger.warning("Failed to find issue's related file", path=file_path)
file_content = None
else:
try:
file_content = self.load_file(file_path)
except ValueError:
# The path is erroneous, consider as empty content
file_content = None
return file_content

@property
def has_clang_files(self):
"""
Expand Down Expand Up @@ -237,17 +257,31 @@ def as_dict(self):
"""
raise NotImplementedError

def serialize(self):
"""
Outputs a tuple of dicts for revision and diff sent to backend
"""
raise NotImplementedError

@staticmethod
def from_try_task(try_task: dict, decision_task: dict, phabricator: PhabricatorAPI):
"""
Load identifiers from Phabricator, using the remote task description
Load identifiers from Phabricator or Github, using the remote task description
"""
from code_review_bot.revisions.github import GithubRevision
from code_review_bot.revisions.phabricator import PhabricatorRevision

# Load build target phid from the task env
code_review = try_task["extra"]["code-review"]
task_env = decision_task.get("payload", {}).get("env", {})

# TODO: support github revision here too
return PhabricatorRevision.from_try_task(
code_review, decision_task, phabricator
)
if task_env.get("GECKO_REPOSITORY_TYPE") == "git":
return GithubRevision(
base_repository=task_env["GECKO_BASE_REPOSITORY"],
base_changeset=task_env["GECKO_BASE_REV"],
head_repository=task_env["GECKO_HEAD_REPOSITORY"],
head_changeset=task_env["GECKO_HEAD_REV"],
pull_number=int(task_env["GECKO_PULL_REQUEST_NUMBER"]),
)
else:
return PhabricatorRevision.from_try_task(
try_task["extra"]["code-review"], decision_task, phabricator
)
Loading