def get_commits_for_bug()

in libmozdata/patchanalysis.py [0:0]


def get_commits_for_bug(bug):
    reviewer_pattern = re.compile(r"r=([a-zA-Z0-9._]+)")
    author_pattern = re.compile(r"<([^>]+)>")
    email_pattern = re.compile(r"<?([\w\-\._\+%]+@[\w\-\._\+%]+)>?")
    backout_pattern = re.compile(
        r"(?:backout|back out|backed out|backedout) (?:changeset )?([a-z0-9]{12,})"
    )
    bug_pattern = re.compile(r"[\t ]*bug[\t ]*([0-9]+)")
    landings = Bugzilla.get_landing_comments(
        bug["comments"], ["inbound", "central", "fx-team"]
    )
    revs = {}
    backed_out_revs = set()
    backout_comments = set()
    for landing in landings:
        rev = landing["revision"][:12]
        channel = landing["channel"]

        # TODO: No need to get the revision, we have everything in the raw format.
        #       We can use pylib/mozautomation/mozautomation/commitparser.py from version-control-tools
        # Or maybe it's better this way, so we can avoid downloading a lot of changes when it's unneeded
        # to do so (e.g. for backouts or merges we only need the description).
        meta = hgmozilla.Revision.get_revision(channel, rev)
        if not meta:
            warnings.warn("Revision " + rev + " doesn't exist.", stacklevel=2)
            continue
        meta["desc"] = meta["desc"].lower()

        # Check if it was a backout
        backout_revisions = set()
        for match in backout_pattern.finditer(meta["desc"]):
            backout_revisions.add(match.group(1)[:12])

        # TODO: Improve matching a backout of multiple changesets in a single line (e.g. bug 683280).
        if not backout_revisions:
            match = re.search(
                "(?:backout|back out|backed out|backedout) changesets", meta["desc"]
            )
            if match:
                pattern = re.compile(r"([a-z0-9]{12,})")
                for match in pattern.finditer(meta["desc"]):
                    backout_revisions.add(match.group(1)[:12])

        if not backout_revisions:
            match = re.search("backout|back out|backed out|backedout", meta["desc"])
            if match:
                for parent in meta["parents"]:
                    for match in backout_pattern.finditer(
                        hgmozilla.Revision.get_revision(channel, parent)["desc"].lower()
                    ):
                        backout_revisions.add(match.group(1)[:12])

                # It's definitely a backout, but we couldn't find which revision was backed out.
                if not backout_revisions:
                    warnings.warn(
                        rev
                        + " looks like a backout, but we couldn't find which revision was backed out.",
                        stacklevel=2,
                    )
                # I wish we could assert instead of warn.
                # assert backout_revisions

        if backout_revisions and not backout_revisions.issubset(backed_out_revs):
            backout_comments.add(landing["comment"]["id"])
            backed_out_revs.update(backout_revisions)

        if backout_revisions:
            continue

        bug_id_match = re.search(bug_pattern, meta["desc"])
        if bug_id_match:
            if int(bug_id_match.group(1)) != bug["id"]:
                warnings.warn(
                    "Revision "
                    + rev
                    + " is related to another bug ("
                    + bug_id_match.group(1)
                    + ").",
                    stacklevel=2,
                )
                continue

        # Skip merges (e.g. http://hg.mozilla.org/mozilla-central/rev/4ca898d7db5f from 914034)
        if not bug_id_match and "merge" in meta["desc"]:
            continue

        reviewers = set()
        for match in reviewer_pattern.finditer(meta["desc"]):
            reviewers.add(match.group(1))

        author_mercurial_match = author_pattern.search(meta["user"])
        if author_mercurial_match is None:
            author_mercurial_match = email_pattern.search(meta["user"])
        if author_mercurial_match is not None:
            author_mercurial = author_mercurial_match.group(1)
            author_real_name = meta["user"][: author_mercurial_match.start() - 1]
        else:
            author_mercurial = author_real_name = meta["user"]

        # Overwrite revisions from integration channels (inbound, fx-team).
        if rev not in revs or channel == "central":
            revs[rev] = {
                "channel": channel,
                "author_mercurial": author_mercurial,
                "author_real_name": author_real_name,
                "creation_date": meta["date"][0],
                "reviewers": reviewers,
            }

    # Remove backed out changesets
    for rev in backed_out_revs:
        if rev not in revs:
            warnings.warn("Revision " + rev + " was not found.", stacklevel=2)
        else:
            del revs[rev]

    return revs, backout_comments