bot/code_review_bot/backend.py (173 lines of code) (raw):
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import urllib.parse
import requests
import structlog
from code_review_bot import taskcluster
from code_review_bot.config import GetAppUserAgent, settings
from code_review_bot.tasks.lint import MozLintIssue
logger = structlog.get_logger(__name__)
class BackendAPI:
"""
API client for our own code-review backend
"""
def __init__(self):
configuration = taskcluster.secrets.get("backend", {})
self.url = configuration.get("url")
self.username = configuration.get("username")
self.password = configuration.get("password")
if self.enabled:
logger.info("Will use backend", url=self.url, user=self.username)
else:
logger.info("Skipping backend storage")
@property
def enabled(self):
return (
self.url is not None
and self.username is not None
and self.password is not None
)
def publish_revision(self, revision):
"""
Create a Revision on the backend.
In case revision.diff_id exists, also create revision's diff.
"""
if not self.enabled:
logger.warn("Skipping revision publication on backend")
return
# Check the repositories are urls
for url in (revision.base_repository, revision.head_repository):
assert isinstance(url, str), "Repository must be a string"
res = urllib.parse.urlparse(url)
assert res.scheme and res.netloc, f"Repository {url} is not an url"
# Check the Mercurial changesets are strings
for changeset in (
revision.base_changeset,
revision.head_changeset,
):
assert isinstance(changeset, str), "Mercurial changeset must be a string"
# Create revision on backend if it does not exists
data = {
"phabricator_id": revision.phabricator_id,
"phabricator_phid": revision.phabricator_phid,
"title": revision.title,
"bugzilla_id": revision.bugzilla_id,
"base_repository": revision.base_repository,
"head_repository": revision.head_repository,
"base_changeset": revision.base_changeset,
"head_changeset": revision.head_changeset,
}
# Try to create the revision, or retrieve it in case it exists with that Phabricator ID.
# The backend always returns a revisions, either a new one, or a pre-existing one
revision_url = "/v1/revision/"
auth = (self.username, self.password)
url_post = urllib.parse.urljoin(self.url, revision_url)
response = requests.post(
url_post, headers=GetAppUserAgent(), json=data, auth=auth
)
if not response.ok:
logger.warn(f"Backend rejected the payload: {response.content}")
return
backend_revision = response.json()
revision.issues_url = backend_revision["issues_bulk_url"]
revision.id = backend_revision["id"]
# A revision may have no diff (e.g. Mozilla-central group tasks)
if not revision.diff_id:
return backend_revision
# Create diff attached to revision on backend
data = {
"id": revision.diff_id,
"phid": revision.diff_phid,
"review_task_id": settings.taskcluster.task_id,
"mercurial_hash": revision.head_changeset,
"repository": revision.head_repository,
}
backend_diff = self.create(backend_revision["diffs_url"], data)
# If we are dealing with a None `backend_revision` bail out
if backend_diff is None:
return backend_revision
return backend_revision
def publish_issues(self, issues, revision):
"""
Publish all issues on the backend in bulk.
"""
if not self.enabled:
logger.warn("Skipping issues publication on backend")
return
published = 0
assert (
revision.issues_url is not None
), "Missing issues_url on the revision to publish issues in bulk."
logger.info(f"Publishing issues in bulk of {settings.bulk_issue_chunks} items.")
chunks = (
issues[i : i + settings.bulk_issue_chunks]
for i in range(0, len(issues), settings.bulk_issue_chunks)
)
for issues_chunk in chunks:
# Store valid data as couples of (<issue>, <json_data>)
valid_data = []
# Build issues' payload for that given chunk
for issue in issues_chunk:
if (
isinstance(issue, MozLintIssue)
and issue.linter == "rust"
and issue.path == "."
):
# Silently ignore issues with path "." from rustfmt, as they cannot be published
# https://github.com/mozilla/code-review/issues/1577
continue
if issue.hash is None:
logger.warning(
"Missing issue hash, cannot publish on backend",
issue=str(issue),
)
continue
valid_data.append((issue, issue.as_dict()))
if not valid_data:
# May happen when a series of issues are missing a hash
logger.warning(
"No issue is valid over an entire chunk",
head_repository=revision.head_repository,
head_changeset=revision.head_changeset,
)
continue
response = self.create(
revision.issues_url,
{"issues": [json_data for _, json_data in valid_data]},
)
if response is None:
# Backend rejected the payload, nothing more to do.
continue
created = response.get("issues")
assert created and len(created) == len(valid_data)
for (issue, _), return_value in zip(valid_data, created):
# Set the returned value on each issue
issue.on_backend = return_value
published += len(valid_data)
total = len(issues)
if published < total:
logger.warn(
"Published a subset of issues", total=total, published=published
)
else:
logger.info("Published all issues on backend", nb=published)
return published
def list_diff_issues(self, diff_id):
"""
List issues for a given diff
"""
return list(self.paginate(f"/v1/diff/{diff_id}/issues/"))
def paginate(self, url_path):
"""
Yield results from a paginated API one by one
"""
auth = (self.username, self.password)
next_url = urllib.parse.urljoin(self.url, url_path)
# Iterate until there is no page left or a status error happen
while next_url:
resp = requests.get(next_url, auth=auth, headers=GetAppUserAgent())
resp.raise_for_status()
data = resp.json()
yield from data.get("results", [])
next_url = data.get("next")
def create(self, url_path, data):
"""
Make an authenticated POST request on the backend
Check that the requested item does not already exists on the backend
"""
assert self.enabled is True, "Backend API is not enabled"
assert url_path.endswith("/")
auth = (self.username, self.password)
if "id" in data:
# Check that the item does not already exists
url_get = urllib.parse.urljoin(self.url, f"{url_path}{data['id']}/")
response = requests.get(url_get, auth=auth, headers=GetAppUserAgent())
if response.ok:
logger.info("Found existing item on backend", url=url_get)
return response.json()
# Create the requested item
url_post = urllib.parse.urljoin(self.url, url_path)
response = requests.post(
url_post, headers=GetAppUserAgent(), json=data, auth=auth
)
if not response.ok:
logger.warn(f"Backend rejected the payload: {response.content}")
return None
out = response.json()
logger.info("Created item on backend", url=url_post, id=out.get("id"))
return out
def list_repo_issues(
self, repo_slug, date=None, revision_changeset=None, path=None
):
"""
List issues detected from a specific repository.
Optional `date` and `revision_id` parameters can be used to look for a
specific revision (defaults to the revision closest to the given date).
"""
params = {
key: value
for key, value in (
("path", path),
("date", date),
("revision_changeset", revision_changeset),
)
if value is not None
}
return list(
self.paginate(f"/v1/issues/{repo_slug}/?{urllib.parse.urlencode(params)}")
)