sync/meta.py (164 lines of code) (raw):

from __future__ import annotations import sys import git import newrelic import pygit2 from . import gh from . import log from . import repos from . import worktree from . import wptmeta from .env import Environment from .base import CommitBuilder, iter_tree from .lock import mut, MutGuard from typing import Iterable, Iterator, TYPE_CHECKING from git.repo.base import Repo if TYPE_CHECKING: from sync.downstream import DownstreamSync from sync.base import ProcessName from sync.lock import SyncLock from sync.wptmeta import MetaLink env = Environment() logger = log.get_logger(__name__) class GitReader(wptmeta.Reader): """Reader that works with a Git repository (without a worktree)""" def __init__(self, repo: Repo, ref: str = "origin/master") -> None: self.repo = repo self.repo.remotes.origin.fetch() self.pygit2_repo = repos.pygit2_get(repo) self.rev = self.pygit2_repo.revparse_single(ref) def exists(self, rel_path: str) -> bool: return rel_path in self.rev.tree def read_path(self, rel_path: str) -> bytes: entry = self.rev.tree[rel_path] return self.pygit2_repo[entry.id].read_raw() def walk(self, rel_path): for path, obj in iter_tree(self.pygit2_repo, rel_path, rev=self.rev): if isinstance(obj, pygit2.Blob) and obj.name == "META.yml": yield "/".join(path[:-1]) class GitWriter(wptmeta.Writer): """Writer that works with a Git repository (without a worktree)""" def __init__(self, builder: CommitBuilder) -> None: self.builder = builder def write(self, rel_path: str, data: bytes) -> None: self.builder.add_tree({rel_path: data}) class NullWriter(wptmeta.Writer): def write(self, rel_path): raise NotImplementedError class Metadata: def __init__(self, process_name: ProcessName, create_pr: bool = False, branch: str = "master" ) -> None: """Object for working with a wpt-metadata repository without requiring a worktree. Data is read directly from blobs and changes are represented as commits to the master branch. On update changes are automatically pushed to the remote origin, and conflicts are automatically handled with a retry algorithm. This implements the usual locking mechanism and writes occur when leaving the mutable scope. :param process_name: ProcessName object for the metadata. This will typically be the process name of an in-progress sync, used to lock the metadata for update. :param create_pr: Create a PR on the remote for the changes, rather than pushing directly to the branch :param branch: Branch to read and/or write to""" self.process_name = process_name self.create_pr = create_pr self.branch = branch self._lock = None meta_repo = repos.WptMetadata(env.config) self.repo = meta_repo.repo() self.pygit2_repo = repos.pygit2_get(self.repo) self.git_reader = GitReader(self.repo, "origin/%s" % self.branch) self.null_writer = NullWriter() self.metadata = wptmeta.WptMetadata(self.git_reader, self.null_writer) self.worktree = worktree.Worktree(self.repo, self.process_name) self.git_work = None def _push(self): raise NotImplementedError def as_mut(self, lock: SyncLock) -> MutGuard: return MutGuard(lock, self) @property def github(self) -> gh.GitHub: return gh.GitHub(env.config["web-platform-tests"]["github"]["token"], env.config["metadata"]["repo"]["url"]) @classmethod def for_sync(cls, sync: DownstreamSync, create_pr: bool = False) -> Metadata: return cls(sync.process_name, create_pr=create_pr) @property def lock_key(self) -> tuple[str, str]: return (self.process_name.subtype, self.process_name.obj_id) def exit_mut(self) -> None: ref_name = self.process_name.path() message = "Gecko sync update" retry = 0 MAX_RETRY = 5 err = None while retry < MAX_RETRY: newrelic.agent.record_custom_event("metadata_update", params={}) self.repo.remotes.origin.fetch() self.pygit2_repo.create_reference(ref_name, self.pygit2_repo.revparse_single( "origin/%s" % self.branch).id, True) commit_builder = CommitBuilder(self.repo, message, ref=ref_name) with commit_builder as builder: self.metadata.writer = GitWriter(builder) self.metadata.write() assert commit_builder.commit is not None if not commit_builder.commit.is_empty(): logger.info("Pushing metadata commit %s" % commit_builder.commit.sha1) remote_ref = self.get_remote_ref() try: self.repo.remotes.origin.push(f"{ref_name}:refs/heads/{remote_ref}") except git.GitCommandError: err = sys.exc_info() else: if self.create_pr: self.github.create_pull(message, "Update from bug %s" % self.process_name.obj_id, self.branch, remote_ref) err = None break retry += 1 else: break if err: newrelic.agent.record_exception(*err, params={ "ref_name": ref_name }) else: self.pygit2_repo.references.delete(ref_name) self.metadata.writer = NullWriter() def get_remote_ref(self) -> str: if not self.create_pr: return self.branch base_ref_name = "gecko/%s" % self.process_name.path().replace("/", "-") ref_name = base_ref_name prefix = "refs/remotes/origin/" count = 0 path = prefix + ref_name while path in self.pygit2_repo.references: count += 1 ref_name = f"{base_ref_name}-{count}" path = prefix + ref_name return ref_name @mut() def link_bug(self, test_id: str, bug_url: str, product: str = "firefox", subtest: str | None = None, status: str | None = None ) -> None: """Add a link to a bug to the metadata :param test_id: id of the test for which the link applies :param bug_url: url of the bug to link to "param product: product for which the link applies :param subtest: optional subtest for which the link applies :param status: optional status for which the link applies""" self.metadata.append_link(bug_url, product=product, test_id=test_id, subtest=subtest, status=status) def iter_bug_links(self, test_id: str, product: str = "firefox", prefixes: Iterable[str] | None = None, subtest: str | None = None, status: str | None = None) -> Iterator[MetaLink]: if prefixes is None: prefixes = (env.bz.bz_url, "https://github.com/wpt/web-platform-tests") for item in self.metadata.iterlinks(test_id=test_id, product=product, subtest=subtest, status=status): if any(item.url.startswith(prefix) for prefix in prefixes): yield item