sync/commit.py (510 lines of code) (raw):
from __future__ import annotations
import os
import re
import subprocess
import git
from mozautomation import commitparser
from git.objects.commit import Commit as GitPythonCommit
from pygit2 import Commit as PyGit2Commit, Oid
from . import log
from .env import Environment
from .errors import AbortError
from .repos import cinnabar, cinnabar_map, pygit2_get
from typing import Dict
from git.repo.base import Repo
from typing import Any
from typing import Callable
from typing import Tuple
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from sync.upstream import UpstreamSync
MsgFilterFunc = Callable[[bytes], Tuple[bytes, Dict[str, str]]]
env = Environment()
logger = log.get_logger(__name__)
METADATA_RE = re.compile(br"([^:]+): (.*)")
def get_metadata(msg: bytes) -> dict[str, str]:
# Since this is data we add, we can be sure it's UTF-8 encoded
data = {}
for line in msg.splitlines():
if line:
m = METADATA_RE.match(line.strip())
if m:
key, value = m.groups()
data[key.decode("utf8")] = value.decode("utf8")
return data
def try_filter(msg: bytes) -> bytes:
# It turns out that the string "try:" is forbidden anywhere in gecko commits,
# because we (mistakenly) think that this always means it's a try string. So we insert
# a ZWSP which means that the try syntax regexp doesn't match, but the printable
# representation of the commit message doesn't change
try_re = re.compile(br"(\b)try:")
msg, _ = try_re.subn("\\1try\u200B:".encode(), msg)
return msg
def first_non_merge(commits: list[WptCommit]) -> WptCommit:
for item in commits:
if not item.is_merge:
return item
raise ValueError("All commits were merge commits")
def create_commit(repo: Repo, msg: bytes, **kwargs: Any) -> GitPythonCommit:
"""Commit the current index in repo, with msg as the message and additional kwargs
from kwargs
gitpython converts all arguments to strings in a way that doesn't allow
passing bytestrings in as arguments. But it's important to allow providing
a message that doesn't have a known encoding since we can't pre-validate that. So
this re-implements the internals of repo.git.execute to avoid the string conversion"""
prev_head = repo.head.commit
exec_kwargs = {k: v for k, v in kwargs.items() if k in git.cmd.execute_kwargs}
opts_kwargs = {k: v for k, v in kwargs.items() if k not in git.cmd.execute_kwargs}
cmd: list[str | bytes | None] = [repo.git.GIT_PYTHON_GIT_EXECUTABLE]
cmd.extend(repo.git._persistent_git_options)
cmd.append(b"commit")
cmd.append(b"--message=%s" % msg)
for name, value in opts_kwargs.items():
name_bytes = git.cmd.dashify(name).encode("utf8")
if isinstance(value, str):
value = value.encode("utf8")
assert value is None or isinstance(value, (bool, bytes))
if value is True:
dashes = b"-" if len(name) == 1 else b"--"
cmd.append(b"%s%s" % (dashes, name_bytes))
elif isinstance(value, bytes):
if len(name) == 1:
cmd.append(b"-%s" % name_bytes)
cmd.append(value)
else:
cmd.append(b"--%s=%s" % (name_bytes, value))
repo.git.execute(cmd, **exec_kwargs)
head = repo.head.commit
assert prev_head != head
return head
class GitNotes:
def __init__(self, commit: Commit) -> None:
self.commit = commit
self.pygit2_repo = pygit2_get(commit.repo)
self._data = self._read()
def _read(self) -> dict[str, str]:
try:
note_sha = self.pygit2_repo.lookup_note(self.commit.sha1).id
note_data = self.pygit2_repo[note_sha].data
except KeyError:
return {}
return get_metadata(note_data)
def __getitem__(self, key: str) -> str:
return self._data[key]
def __contains__(self, key: str) -> bool:
return key in self._data
def __setitem__(self, key: str, value: str) -> None:
self._data[key] = value
data = "\n".join("%s: %s" % item for item in self._data.items())
self.pygit2_repo.create_note(data,
self.pygit2_repo.default_signature,
self.pygit2_repo.default_signature,
self.commit.sha1,
"refs/notes/commits",
True)
class Commit:
def __init__(self, repo: Repo,
commit: str | Commit | GitPythonCommit | PyGit2Commit | Oid) -> None:
self.repo = repo
self.pygit2_repo = pygit2_get(repo)
self.cinnabar = cinnabar_map.get(repo)
_commit = None
_pygit2_commit = None
if hasattr(commit, "hexsha"):
assert isinstance(commit, GitPythonCommit)
sha1 = commit.hexsha
_commit = commit
elif isinstance(commit, Oid):
sha1 = str(commit)
elif hasattr(commit, "sha1"):
assert isinstance(commit, Commit)
sha1 = commit.sha1
elif isinstance(commit, (bytes, str)):
if isinstance(commit, bytes):
commit_text: str = commit.decode("ascii")
else:
commit_text = commit
commit_obj = self.pygit2_repo.revparse_single(commit_text)
sha1 = str(commit_obj.id)
elif hasattr(commit, "id"):
assert isinstance(commit, PyGit2Commit)
sha1 = commit.id
_pygit2_commit = commit
else:
raise ValueError("Unrecognised commit %r (type %s)" % (commit, type(commit)))
if sha1 not in self.pygit2_repo:
raise ValueError(f"Commit with SHA1 {sha1} not found")
self.sha1: str = sha1
self._commit = _commit
self._pygit2_commit = _pygit2_commit
self._notes: GitNotes | None = None
def __eq__(self, other: Any) -> bool:
if hasattr(other, "sha1"):
return self.sha1 == other.sha1
elif hasattr(other, "hexsha"):
return self.sha1 == other.hexsha
else:
return self.sha1 == other
return False
def __ne__(self, other: Any) -> bool:
return not self == other
@property
def commit(self) -> GitPythonCommit:
if self._commit is None:
self._commit = self.repo.commit(self.sha1)
return self._commit
@property
def pygit2_commit(self) -> PyGit2Commit:
if self._pygit2_commit is None:
self._pygit2_commit = self.pygit2_repo[self.sha1]
return self._pygit2_commit
@property
def notes(self) -> GitNotes:
if self._notes is None:
self._notes = GitNotes(self)
assert self._notes is not None
return self._notes
@property
def canonical_rev(self) -> str:
if self.cinnabar:
return self.cinnabar.git2hg(self.sha1)
return self.sha1
@property
def msg(self) -> bytes:
return self.pygit2_commit.raw_message
@property
def author(self) -> bytes:
author = self.pygit2_commit.author
name = author.raw_name
email = author.raw_email if author.email else b"unknown"
return b"%s <%s>" % (name, email)
@property
def email(self) -> bytes:
author = self.pygit2_commit.author
return author.raw_email
@property
def metadata(self) -> dict[str, str]:
return get_metadata(self.msg)
@property
def is_merge(self) -> bool:
return len(self.pygit2_commit.parent_ids) > 1
@classmethod
def create(cls,
repo: Repo,
msg: bytes,
metadata: dict[str, str] | None,
author: bytes | None = None,
amend: bool = False,
allow_empty: bool = False
) -> Commit:
msg = Commit.make_commit_msg(msg, metadata)
commit_kwargs: dict[str, Any] = {}
if amend:
commit_kwargs["amend"] = True
commit_kwargs["no_edit"] = True
else:
if author is not None:
commit_kwargs["author"] = author
if allow_empty:
commit_kwargs["allow_empty"] = True
commit = create_commit(repo, msg, **commit_kwargs)
return cls(repo, commit.hexsha)
@staticmethod
def make_commit_msg(msg: bytes, metadata: dict[str, str] | None) -> bytes:
if metadata:
metadata_text = "\n".join("%s: %s" % item for item in sorted(metadata.items()))
new_lines = b"\n\n" if not msg.endswith(b"\n") else b"\n"
msg = b"".join([msg, new_lines, metadata_text.encode("utf8")])
if isinstance(msg, str):
msg = msg.encode("utf8")
return msg
def is_empty(self, prefix: str | None = None) -> bool:
if len(self.pygit2_commit.parents) == 1:
# Fast-path for non-merge commits
diff = self.pygit2_repo.diff(self.pygit2_commit,
self.pygit2_commit.parents[0])
if not prefix:
# Empty if there are no deltas in the diff
return not any(diff.deltas)
for delta in diff.deltas:
if (delta.old_file.path.startswith(prefix) or
delta.new_file.path.startswith(prefix)):
return False
return True
return self.show(src_prefix=prefix,
format="",
patch=True).strip() == ""
def tags(self) -> list[str]:
return [item for item in self.repo.git.tag(points_at=self.sha1).split("\n")
if item.strip()]
def move(self,
dest_repo: Repo,
skip_empty: bool = True,
msg_filter: MsgFilterFunc | None = None,
metadata: dict[str, str] | None = None,
src_prefix: str | None = None,
dest_prefix: str | None = None,
amend: bool = False,
three_way: bool = True,
exclude: Any | None = None,
patch_fallback: bool = False,
allow_empty: bool = False,
) -> Commit | None:
return _apply_patch(self.show(src_prefix), self.msg, self.canonical_rev, dest_repo,
skip_empty, msg_filter, metadata, src_prefix, dest_prefix, amend,
three_way, author=self.author, exclude=exclude,
patch_fallback=patch_fallback, allow_empty=allow_empty)
def show(self, src_prefix: str | None = None, **kwargs: Any) -> bytes:
show_args: tuple[str, ...] = ()
if src_prefix:
show_args = ("--", src_prefix)
try:
show_kwargs: dict[str, Any] = {"binary": True, "stdout_as_string": False}
show_kwargs.update(kwargs)
return self.repo.git.show(self.sha1, *show_args, **show_kwargs) + b"\n"
except git.GitCommandError as e:
raise AbortError("git show failed") from e
def move_commits(repo: Repo,
revish: str,
message: bytes,
dest_repo: Repo,
skip_empty: bool = True,
msg_filter: MsgFilterFunc | None = None,
metadata: dict[str, str] | None = None,
src_prefix: str | None = None,
dest_prefix: str | None = None,
amend: bool = False,
three_way: bool = True,
rev_name: str | None = None,
author: bytes | None = None,
exclude: set[str] | None = None,
patch_fallback: bool = False,
allow_empty: bool = False,
) -> Commit | None:
if rev_name is None:
rev_name = revish
diff_args: tuple[str, ...] = ()
if src_prefix:
diff_args = ("--", src_prefix)
try:
patch = repo.git.diff(revish, binary=True, submodule="diff",
pretty="email", stdout_as_string=False, *diff_args) + b"\n"
logger.info("Created patch")
except git.GitCommandError as e:
raise AbortError("git diff failed") from e
return _apply_patch(patch, message, rev_name, dest_repo, skip_empty, msg_filter, metadata,
src_prefix, dest_prefix, amend, three_way, author=author, exclude=exclude,
patch_fallback=patch_fallback, allow_empty=allow_empty)
def _apply_patch(patch: bytes,
message: bytes,
rev_name: str,
dest_repo: Repo,
skip_empty: bool = True,
msg_filter: MsgFilterFunc | None = None,
metadata: dict[str, str] | None = None,
src_prefix: str | None = None,
dest_prefix: str | None = None,
amend: bool = False,
three_way: bool = True,
author: bytes | None = None,
exclude: set[str] | None = None,
patch_fallback: bool = False,
allow_empty: bool = False,
) -> Commit | None:
assert isinstance(patch, bytes)
if skip_empty and (not patch or patch.isspace() or
not any(line.startswith(b"diff ") for line in patch.splitlines())):
return None
if metadata is None:
metadata = {}
if msg_filter:
msg, metadata_extra = msg_filter(message)
else:
msg, metadata_extra = message, {}
if metadata_extra:
metadata.update(metadata_extra)
msg = Commit.make_commit_msg(msg, metadata)
working_dir = dest_repo.working_dir
assert working_dir is not None
with Store(dest_repo, rev_name + ".message", msg) as message_path:
strip_dirs = len(src_prefix.split("/")) + 1 if src_prefix else 1
with Store(dest_repo, rev_name + ".diff", patch) as patch_path:
# Without this tests were failing with "Index does not match"
dest_repo.git.update_index(refresh=True)
apply_kwargs: dict[str, Any] = {}
if dest_prefix:
apply_kwargs["directory"] = dest_prefix
if three_way:
apply_kwargs["3way"] = True
else:
apply_kwargs["reject"] = True
err_msg: str | None = None
try:
logger.info("Trying to apply patch")
dest_repo.git.apply(patch_path, index=True, binary=True,
p=strip_dirs, **apply_kwargs)
logger.info("Patch applied")
except git.GitCommandError as e:
err_msg = """git apply failed
{} returned status {}
Patch saved as :{}
Commit message saved as: {}
{}""".format(e.command, e.status, patch_path, message_path, e.stderr)
if patch_fallback and not dest_repo.is_dirty():
dest_repo.git.reset(hard=True)
cmd = ["patch", "-p%s" % strip_dirs, "-f", "-r=-",
"--no-backup-if-mismatch"]
if dest_prefix:
cmd.append("--directory=%s" % dest_prefix)
logger.info(" ".join(cmd))
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
(stdout, stderr) = proc.communicate(patch)
if not proc.returncode == 0:
err_msg = ("%s\n\nPatch failed (status %i):\nstdout:\n%s\nstderr:\n%s" %
(err_msg,
proc.returncode,
stdout.decode("utf8", "replace") if stdout else "",
stderr.decode("utf8", "replace") if stderr else ""))
else:
err_msg = None
prefix = b"+++ "
paths = []
for line in patch.splitlines():
if line.startswith(prefix):
path_parts_bytes = line[len(prefix):].split(b"/")[strip_dirs:]
path_parts = [item.decode("utf8") for item in path_parts_bytes]
if dest_prefix:
path = os.path.join(dest_prefix, *path_parts)
else:
path = os.path.join(*path_parts)
paths.append(path)
dest_repo.git.add(*paths)
if err_msg is not None:
raise AbortError(err_msg)
if exclude:
exclude_paths = [os.path.join(dest_prefix, exclude_path)
if dest_prefix else exclude_path
for exclude_path in exclude]
exclude_paths = [item for item in exclude_paths
if os.path.exists(os.path.join(working_dir, item))]
try:
dest_repo.git.checkout("HEAD", *exclude_paths)
except git.GitCommandError as e:
logger.info(e)
try:
logger.info("Creating commit")
return Commit.create(dest_repo, msg, None, amend=amend, author=author,
allow_empty=allow_empty)
except git.GitCommandError as e:
if amend and e.status == 1 and "--allow-empty" in e.stdout:
logger.warning("Amending commit made it empty, resetting")
dest_repo.git.reset("HEAD^")
return None
elif not amend and e.status == 1 and "nothing added to commit" in e.stdout:
logger.warning("Commit added no changes to destination repo")
return None
else:
dest_repo.git.reset(hard=True)
raise
class GeckoCommit(Commit):
@property
def bug(self) -> int | None:
bugs = commitparser.parse_bugs(self.msg.splitlines()[0])
if len(bugs) > 1:
logger.warning("Got multiple bugs for commit %s: %s" %
(self.canonical_rev,
", ".join(str(item) for item in bugs)))
if not bugs:
return None
assert isinstance(bugs[0], int)
return bugs[0]
def has_wpt_changes(self) -> bool:
prefix = env.config["gecko"]["path"]["wpt"]
return not self.is_empty(prefix)
@property
def is_backout(self) -> bool:
return commitparser.is_backout(self.msg)
@property
def is_downstream(self) -> bool:
from . import downstream
return downstream.DownstreamSync.has_metadata(self.msg)
@property
def is_landing(self) -> bool:
from . import landing
return landing.LandingSync.has_metadata(self.msg)
def commits_backed_out(self) -> tuple[list[GeckoCommit], set[int]]:
# TODO: should bugs be int here
commits: list[GeckoCommit] = []
bugs: list[int] = []
if self.is_backout:
nodes_bugs = commitparser.parse_backouts(self.msg)
if nodes_bugs is None:
# We think this a backout, but have no idea what it backs out
# it's not clear how to handle that case so for now we pretend it isn't
# a backout
return commits, set(bugs)
nodes, bugs = nodes_bugs
# Assuming that all commits are listed.
for node in nodes:
git_sha = cinnabar(self.repo).hg2git(node.decode("ascii"))
commits.append(GeckoCommit(self.repo, git_sha))
return commits, set(bugs)
def wpt_commits_backed_out(self, exclude_downstream: bool = True,
exclude_landing: bool = True) -> tuple[list[GeckoCommit], set[int]]:
"""Get a list of all the wpt commits backed out by the current commit.
:param exclude_downstream: Exclude commits that were downstreamed
"""
all_commits, bugs = self.commits_backed_out()
commits = []
for commit in all_commits:
if (commit.has_wpt_changes() and
not (exclude_downstream and commit.is_downstream) and
not (exclude_landing and commit.is_landing)):
commits.append(commit)
return commits, set(bugs)
def landing_commits_backed_out(self) -> tuple[list[GeckoCommit], set[int]]:
all_commits, bugs = self.commits_backed_out()
commits = []
for commit in all_commits:
if commit.is_landing:
commits.append(commit)
return commits, set(bugs)
def upstream_sync(self, git_gecko: Repo, git_wpt: Repo) -> UpstreamSync | None:
from . import upstream
if "upstream-sync" in self.notes:
seq_id: int | None = None
bug_str, seq_id_str = self.notes["upstream-sync"].split(":", 1)
if seq_id_str == "":
seq_id = None
else:
seq_id = int(seq_id_str)
bug = int(bug_str)
syncs = upstream.UpstreamSync.load_by_obj(git_gecko, git_wpt, bug, seq_id=seq_id)
assert len(syncs) <= 1
if syncs:
sync = syncs.pop()
# TODO: Improve the annotations so that this is implied
assert isinstance(sync, upstream.UpstreamSync)
return sync
return None
def set_upstream_sync(self, sync: UpstreamSync) -> None:
from . import upstream
if not isinstance(sync, upstream.UpstreamSync):
raise ValueError
seq_id = sync.seq_id
if seq_id is None:
seq_id = ""
self.notes["upstream-sync"] = f"{sync.bug}:{seq_id}"
class WptCommit(Commit):
def pr(self) -> int | None:
if "wpt_pr" not in self.notes:
tags = [item.rsplit("_", 1)[1] for item in self.tags()
if item.startswith("merge_pr_")]
if tags and len(tags) == 1:
logger.info("Using tagged PR for commit %s" % self.sha1)
pr = tags[0]
else:
pr = str(env.gh_wpt.pr_for_commit(self.sha1))
if not pr:
pr == ""
logger.info("Setting PR to %s" % pr)
self.notes["wpt_pr"] = pr
pr = self.notes["wpt_pr"]
try:
return int(pr)
except (TypeError, ValueError):
return None
class Store:
"""Create a named file that is deleted if no exception is raised"""
def __init__(self, repo: Repo, name: str, data: bytes) -> None:
working_dir = repo.working_dir
assert working_dir is not None
self.path = os.path.join(working_dir, name)
self.data: bytes | None = data
assert isinstance(data, bytes)
def __enter__(self) -> str:
assert self.data is not None
with open(self.path, "wb") as f:
f.write(self.data)
self.data = None
return self.path
def __exit__(self,
type: type | None,
value: Exception | None,
traceback: Any | None,
) -> None:
if not type:
os.unlink(self.path)