in libmozdata/patchanalysis.py [0:0]
def get_commits_for_bug(bug):
reviewer_pattern = re.compile(r"r=([a-zA-Z0-9._]+)")
author_pattern = re.compile(r"<([^>]+)>")
email_pattern = re.compile(r"<?([\w\-\._\+%]+@[\w\-\._\+%]+)>?")
backout_pattern = re.compile(
r"(?:backout|back out|backed out|backedout) (?:changeset )?([a-z0-9]{12,})"
)
bug_pattern = re.compile(r"[\t ]*bug[\t ]*([0-9]+)")
landings = Bugzilla.get_landing_comments(
bug["comments"], ["inbound", "central", "fx-team"]
)
revs = {}
backed_out_revs = set()
backout_comments = set()
for landing in landings:
rev = landing["revision"][:12]
channel = landing["channel"]
# TODO: No need to get the revision, we have everything in the raw format.
# We can use pylib/mozautomation/mozautomation/commitparser.py from version-control-tools
# Or maybe it's better this way, so we can avoid downloading a lot of changes when it's unneeded
# to do so (e.g. for backouts or merges we only need the description).
meta = hgmozilla.Revision.get_revision(channel, rev)
if not meta:
warnings.warn("Revision " + rev + " doesn't exist.", stacklevel=2)
continue
meta["desc"] = meta["desc"].lower()
# Check if it was a backout
backout_revisions = set()
for match in backout_pattern.finditer(meta["desc"]):
backout_revisions.add(match.group(1)[:12])
# TODO: Improve matching a backout of multiple changesets in a single line (e.g. bug 683280).
if not backout_revisions:
match = re.search(
"(?:backout|back out|backed out|backedout) changesets", meta["desc"]
)
if match:
pattern = re.compile(r"([a-z0-9]{12,})")
for match in pattern.finditer(meta["desc"]):
backout_revisions.add(match.group(1)[:12])
if not backout_revisions:
match = re.search("backout|back out|backed out|backedout", meta["desc"])
if match:
for parent in meta["parents"]:
for match in backout_pattern.finditer(
hgmozilla.Revision.get_revision(channel, parent)["desc"].lower()
):
backout_revisions.add(match.group(1)[:12])
# It's definitely a backout, but we couldn't find which revision was backed out.
if not backout_revisions:
warnings.warn(
rev
+ " looks like a backout, but we couldn't find which revision was backed out.",
stacklevel=2,
)
# I wish we could assert instead of warn.
# assert backout_revisions
if backout_revisions and not backout_revisions.issubset(backed_out_revs):
backout_comments.add(landing["comment"]["id"])
backed_out_revs.update(backout_revisions)
if backout_revisions:
continue
bug_id_match = re.search(bug_pattern, meta["desc"])
if bug_id_match:
if int(bug_id_match.group(1)) != bug["id"]:
warnings.warn(
"Revision "
+ rev
+ " is related to another bug ("
+ bug_id_match.group(1)
+ ").",
stacklevel=2,
)
continue
# Skip merges (e.g. http://hg.mozilla.org/mozilla-central/rev/4ca898d7db5f from 914034)
if not bug_id_match and "merge" in meta["desc"]:
continue
reviewers = set()
for match in reviewer_pattern.finditer(meta["desc"]):
reviewers.add(match.group(1))
author_mercurial_match = author_pattern.search(meta["user"])
if author_mercurial_match is None:
author_mercurial_match = email_pattern.search(meta["user"])
if author_mercurial_match is not None:
author_mercurial = author_mercurial_match.group(1)
author_real_name = meta["user"][: author_mercurial_match.start() - 1]
else:
author_mercurial = author_real_name = meta["user"]
# Overwrite revisions from integration channels (inbound, fx-team).
if rev not in revs or channel == "central":
revs[rev] = {
"channel": channel,
"author_mercurial": author_mercurial,
"author_real_name": author_real_name,
"creation_date": meta["date"][0],
"reviewers": reviewers,
}
# Remove backed out changesets
for rev in backed_out_revs:
if rev not in revs:
warnings.warn("Revision " + rev + " was not found.", stacklevel=2)
else:
del revs[rev]
return revs, backout_comments