def query_data()

in treeherder/etl/management/commands/ingest.py [0:0]


def query_data(repo_meta, commit):
    """Find the right event base sha to get the right list of commits

    This is not an issue in GithubPushTransformer because the PushEvent from Taskcluster
    already contains the data
    """
    # This is used for the `compare` API. The "event.base.sha" is only contained in Pulse events, thus,
    # we need to determine the correct value
    event_base_sha = repo_meta["branch"]
    # First we try with `master` being the base sha
    # e.g. https://api.github.com/repos/servo/servo/compare/master...1418c0555ff77e5a3d6cf0c6020ba92ece36be2e
    compare_response = github.compare_shas(
        repo_meta["owner"], repo_meta["repo"], repo_meta["branch"], commit
    )
    merge_base_commit = compare_response.get("merge_base_commit")
    if merge_base_commit:
        commiter_date = merge_base_commit["commit"]["committer"]["date"]
        # Since we don't use PushEvents that contain the "before" or "event.base.sha" fields [1]
        # we need to discover the right parent which existed in the base branch.
        # [1] https://github.com/taskcluster/taskcluster/blob/3dda0adf85619d18c5dcf255259f3e274d2be346/services/github/src/api.js#L55
        parents = compare_response["merge_base_commit"]["parents"]
        if len(parents) == 1:
            parent = parents[0]
            commit_info = fetch_json(parent["url"])
            committer_date = commit_info["commit"]["committer"]["date"]
            # All commits involved in a PR share the same committer's date
            if merge_base_commit["commit"]["committer"]["date"] == committer_date:
                # Recursively find the forking parent
                event_base_sha, _ = query_data(repo_meta, parent["sha"])
            else:
                event_base_sha = parent["sha"]
        else:
            for parent in parents:
                _commit = fetch_json(parent["url"])
                # All commits involved in a merge share the same committer's date
                if commiter_date != _commit["commit"]["committer"]["date"]:
                    event_base_sha = _commit["sha"]
                    break
        # This is to make sure that the value has changed
        assert event_base_sha != repo_meta["branch"]
        logger.info("We have a new base: %s", event_base_sha)
        # When using the correct event_base_sha the "commits" field will be correct
        compare_response = github.compare_shas(
            repo_meta["owner"], repo_meta["repo"], event_base_sha, commit
        )

    commits = []
    for _commit in compare_response["commits"]:
        commits.append(
            {
                "message": _commit["commit"]["message"],
                "author": _commit["commit"]["author"],
                "committer": _commit["commit"]["committer"],
                "id": _commit["sha"],
            }
        )

    return event_base_sha, commits