in treeherder/etl/management/commands/ingest.py [0:0]
def query_data(repo_meta, commit):
"""Find the right event base sha to get the right list of commits
This is not an issue in GithubPushTransformer because the PushEvent from Taskcluster
already contains the data
"""
# This is used for the `compare` API. The "event.base.sha" is only contained in Pulse events, thus,
# we need to determine the correct value
event_base_sha = repo_meta["branch"]
# First we try with `master` being the base sha
# e.g. https://api.github.com/repos/servo/servo/compare/master...1418c0555ff77e5a3d6cf0c6020ba92ece36be2e
compare_response = github.compare_shas(
repo_meta["owner"], repo_meta["repo"], repo_meta["branch"], commit
)
merge_base_commit = compare_response.get("merge_base_commit")
if merge_base_commit:
commiter_date = merge_base_commit["commit"]["committer"]["date"]
# Since we don't use PushEvents that contain the "before" or "event.base.sha" fields [1]
# we need to discover the right parent which existed in the base branch.
# [1] https://github.com/taskcluster/taskcluster/blob/3dda0adf85619d18c5dcf255259f3e274d2be346/services/github/src/api.js#L55
parents = compare_response["merge_base_commit"]["parents"]
if len(parents) == 1:
parent = parents[0]
commit_info = fetch_json(parent["url"])
committer_date = commit_info["commit"]["committer"]["date"]
# All commits involved in a PR share the same committer's date
if merge_base_commit["commit"]["committer"]["date"] == committer_date:
# Recursively find the forking parent
event_base_sha, _ = query_data(repo_meta, parent["sha"])
else:
event_base_sha = parent["sha"]
else:
for parent in parents:
_commit = fetch_json(parent["url"])
# All commits involved in a merge share the same committer's date
if commiter_date != _commit["commit"]["committer"]["date"]:
event_base_sha = _commit["sha"]
break
# This is to make sure that the value has changed
assert event_base_sha != repo_meta["branch"]
logger.info("We have a new base: %s", event_base_sha)
# When using the correct event_base_sha the "commits" field will be correct
compare_response = github.compare_shas(
repo_meta["owner"], repo_meta["repo"], event_base_sha, commit
)
commits = []
for _commit in compare_response["commits"]:
commits.append(
{
"message": _commit["commit"]["message"],
"author": _commit["commit"]["author"],
"committer": _commit["commit"]["committer"],
"id": _commit["sha"],
}
)
return event_base_sha, commits