bot/tools/fix_missing.py (132 lines of code) (raw):
#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import time
from datetime import datetime, timedelta
import requests
from taskcluster.helper import TaskclusterConfig
from code_review_bot.config import GetAppUserAgent
TREEHERDER_PUSH_URL = "https://treeherder.mozilla.org/api/project/try/push/"
TREEHERDER_JOBS_URL = "https://treeherder.mozilla.org/api/jobs/"
BACKEND_URL = "https://api.code-review.moz.tools/v1/diff/"
REGEX_PHAB_ID = re.compile(
r"try_task_config for https://phabricator.services.mozilla.com/D(\d+)"
)
PHABRICATOR_REVISION_URL = (
"https://phabricator.services.mozilla.com/api/differential.revision.search"
)
taskcluster = TaskclusterConfig("https://firefox-ci-tc.services.mozilla.com")
def phab_state(revision_id):
data = {
"constraints[ids][0]": revision_id,
"api.token": taskcluster.secrets["PHABRICATOR"]["api_key"],
}
resp = requests.post(PHABRICATOR_REVISION_URL, data)
resp.raise_for_status()
data = resp.json()
return data["result"]["data"][0]["fields"]["status"]
def list_diffs(min_date, max_date):
url = BACKEND_URL
revisions = []
updates = {}
while True:
resp = requests.get(url, headers=GetAppUserAgent())
resp.raise_for_status()
data = resp.json()
for diff in data["results"]:
# Limit to specific dates
date = datetime.strptime(diff["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
if date >= max_date:
continue
if date < min_date:
return revisions, updates
# Save revision
revisions.append(diff["mercurial_hash"])
# Save best date for a Phabricator revision
last_date = updates.get(diff["revision"]["id"])
if last_date is None or last_date < date:
updates[diff["revision"]["id"]] = date
# Move to next page
url = data["next"]
def timestamp(date):
return time.mktime(date.timetuple())
def list_pushes(known_revisions, updates, min_date, max_date):
params = {
# 'full': 'true',
"count": 10,
"author": "reviewbot",
"push_timestamp__lte": timestamp(max_date),
}
while True:
resp = requests.get(TREEHERDER_PUSH_URL, params, headers=GetAppUserAgent())
resp.raise_for_status()
data = resp.json()
for job in data["results"]:
rev = job["revision"]
date = datetime.fromtimestamp(job["push_timestamp"])
if date < min_date:
return
# Check if that job has been processed
if rev in known_revisions:
print(f"Skipping {rev}: already in backend")
continue
# Check if that phabricator revision needs an update
match = REGEX_PHAB_ID.search(job["revisions"][0]["comments"])
if match is None:
print(f"No Phabricator revision found for {rev}")
phab_revision = int(match.group(1))
update = updates.get(phab_revision)
if update and update > date:
print(f"Skipping {rev}: revision already got a review")
continue
# Check if revision is still open
state = phab_state(phab_revision)
if state["closed"] is True:
print(
f"Skipping {rev}: revision is closed on Phabricator {phab_revision}"
)
continue
# Process job
yield job
# Go to next page
params["push_timestamp__lte"] = job["push_timestamp"] - 1
def find_task(push_id):
# Find the task ids from Treeherder
resp = requests.get(
TREEHERDER_JOBS_URL, {"push_id": push_id}, headers=GetAppUserAgent()
)
resp.raise_for_status()
data = resp.json()
tasks = [dict(zip(data["job_property_names"], res)) for res in data["results"]]
assert len(tasks) > 0
# Task group is first task id
task_group_id = tasks[0]["task_id"]
# List task group from taskcluster as the code review task is not on treeherder
queue = taskcluster.get_service("queue")
group = queue.listTaskGroup(task_group_id)
# And find the code-review-issues task in that group !
return next(
iter(
[
task
for task in group["tasks"]
if task["task"]["metadata"]["name"] == "code-review-issues"
]
),
None,
)
def go(min_date, max_date):
# Start by authenticating on taskcluster
taskcluster.auth()
# And load secret
taskcluster.load_secrets(
"project/relman/code-review/runtime-production",
prefixes=["common"],
required=["PHABRICATOR"],
)
# Load hook service
hooks = taskcluster.get_service("hooks")
# Retrieve known updates from code review backend
print(f"Loading known revisions from {min_date} to {max_date}")
revisions, updates = list_diffs(min_date, max_date)
print(f"Got {len(revisions)} mercurial revisions")
print(f"Got {len(updates)} phab revision updates")
# Process all pushes without a review task in backend
# and when their revision has no update
for push in list_pushes(revisions, updates, min_date, max_date):
print(f"Triggering push {push['id']} @ {push['revision']}")
task = find_task(push["id"])
if not task:
print("No code review task found !")
continue
payload = {
"TRY_RUN_ID": task["status"]["runs"][0]["runId"],
"TRY_TASK_GROUP_ID": task["status"]["taskGroupId"],
"TRY_TASK_ID": task["status"]["taskId"],
}
print(f"Found code review task as {payload['TRY_TASK_ID']}")
new_task = hooks.triggerHook(
"project-relman", "code-review-production", payload
)
print(f" > Running in {new_task['status']['taskId']}")
if __name__ == "__main__":
now = datetime.utcnow()
go(now - timedelta(days=2), now - timedelta(seconds=2 * 3600))