sync/wptmeta/__init__.py (330 lines of code) (raw):
from __future__ import annotations
import os
from abc import ABCMeta, abstractmethod
from collections import OrderedDict, namedtuple
from copy import deepcopy
import urllib.parse
import yaml
from typing import Any, Iterator
"""Module for interacting with a web-platform-tests metadata repository"""
class DeleteTrackingList(list):
"""A list that holds a reference to any elements that are removed"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
self._deleted: list[Any] = []
super().__init__(*args, **kwargs)
def __setitem__(self, index, value):
self._dirty = True
super().__setitem__(index, value)
def __setslice__(self, index0, index1, value):
self.deleted.extend(self[index0:index1])
super().__setslice__(index0, index1, value)
def __delitem__(self, index):
self.deleted.append(self[index]._initial_state)
super().__delitem__(index)
def __delslice__(self, index0, index1):
self.deleted.extend(self[index0:index1])
super().__delslice__(index0, index1)
def pop(self):
rv = super().pop()
self._deleted.append(rv)
return rv
def remove(self, item: Any) -> Any:
try:
return super().remove(item)
finally:
self._deleted.append(item)
def parse_test(test_id: str) -> tuple[str, str]:
id_parts = urllib.parse.urlsplit(test_id)
dir_name, test_file = id_parts.path.rsplit("/", 1)
if dir_name[0] == "/":
dir_name = dir_name[1:]
test_name = urllib.parse.urlunsplit(("", "", test_file, id_parts.query,
id_parts.fragment))
return dir_name, test_name
class Reader(metaclass=ABCMeta):
"""Class implementing read operations on paths"""
@abstractmethod
def read_path(self, rel_path: str) -> bytes:
"""Read the contents of `rel_path` as a bytestring
:param rel_path` Relative path to read
:returns: Bytes containing path contents
"""
pass
@abstractmethod
def exists(self, rel_path: str) -> bool:
"""Determine if `rel_path` is a valid path
:param rel_path` Relative path
:returns: Boolean indicating if `rel_path` is a valid path"""
pass
@abstractmethod
def walk(self, rel_path: str) -> Iterator[str]:
"""Iterator over all paths under rel_path containing an object
:param rel_path` Relative path
:returns: Iterator over path strings
"""
pass
class Writer(metaclass=ABCMeta):
"""Class implementing write operations on paths"""
@abstractmethod
def write(self, rel_path: str, data: bytes) -> None:
"""Write `data` to the object at `rel_path`
:param rel_path` Relative path to object
:param data: Bytes containing data to write
"""
pass
class FilesystemReader(Reader):
"""Reader implementation operating on filesystem files"""
def __init__(self, root):
self.root = root
def read_path(self, rel_path):
path = os.path.join(self.root, rel_path)
with open(path) as f:
return f.read()
def exists(self, rel_path):
return os.path.exists(os.path.join(self.root, rel_path))
def walk(self, rel_path):
base = os.path.join(self.root, rel_path)
for dir_path, dir_names, file_names in os.walk(base):
if "META.yml" in file_names:
yield os.path.relpath(dir_path, self.root)
class FilesystemWriter(Writer):
"""Writer implementation operating on filesystem files"""
def __init__(self, root):
self.root = root
def write(self, rel_path, data):
path = os.path.join(self.root, rel_path)
with open(path, "w") as f:
return f.write(data)
def metadata_directory(root):
reader = FilesystemReader(root)
writer = FilesystemWriter(root)
return WptMetadata(reader, writer)
class WptMetadata:
def __init__(self, reader: Reader, writer: Writer) -> None:
"""Object for working with a wpt-metadata tree
:param reader: Object implementing Reader
:param writer: Object implementing Writer"""
self.reader = reader
self.writer = writer
self.loaded: dict[str, MetaFile] = {}
def iter(self,
test_id: str | None = None,
product: str | None = None,
subtest: str | None = None,
status: str | None = None) -> Iterator[MetaEntry]:
"""Get the link metadata matching a specified set of conditions"""
if test_id is None:
dir_names = self.reader.walk("")
else:
assert test_id.startswith("/")
dir_name, _ = parse_test(test_id)
dir_names = iter([dir_name])
for dir_name in dir_names:
if dir_name not in self.loaded:
self.loaded[dir_name] = MetaFile(self, dir_name)
yield from self.loaded[dir_name].iter(product=product,
test_id=test_id,
subtest=subtest,
status=status)
def iterlinks(self,
test_id: str | None = None,
product: str | None = None,
subtest: str | None = None,
status: str | None = None) -> Iterator[MetaLink]:
"""Get the link metadata matching a specified set of conditions"""
for item in self.iter(test_id, product, subtest, status):
if isinstance(item, MetaLink):
yield item
def iterlabels(self,
test_id: str | None = None,
product: str | None = None,
subtest: str | None = None,
status: str | None = None) -> Iterator[MetaLabel]:
"""Get the label metadata matching a specified set of conditions"""
for item in self.iter(test_id, product, subtest, status):
if isinstance(item, MetaLabel):
yield item
def write(self) -> list[str]:
"""Write any updated metadata to the metadata tree"""
rv = []
for meta_file in self.loaded.values():
if meta_file.write():
rv.append(meta_file.rel_path)
return rv
def append_link(self, url: str, product: str, test_id: str, subtest: str | None = None,
status: str | None = None) -> None:
"""Add a link to the metadata tree
:param url: URL to link to
:param product: Product for which the link is relevant
:param test_id: Full test id for which the link is relevant
:param subtest: Subtest for which the link is relevant or
None to apply to parent/all tests
:param status: Result status for which the link is relevant or
None to apply to all statuses"""
assert test_id.startswith("/")
dir_name, test_name = parse_test(test_id)
if dir_name not in self.loaded:
self.loaded[dir_name] = MetaFile(self, dir_name)
meta_file = self.loaded[dir_name]
link = MetaLink(meta_file, test_id, url, product, subtest, status)
meta_file.links.append(link)
class MetaFile:
def __init__(self, owner: WptMetadata, dir_name: str) -> None:
"""Object representing a single META.yml file
This uses an unusual algorithm for updated; first we reread
the underlying data, then we apply changes that have been made
locally on top of the re-read data. This allows the changes to
be applied in the face of multiple writers without locking or
generating conflicts that require human resolution. However
the algorithm is not perfect; for example an entry that's
deleted remotely may be readded if it has local modifications;
that may or may not be correct depending on the situation.
:param owner: The parent WptMetadata object
:param dir_name: The relative path to the directory containing the
META.yml file
"""
self.owner = owner
self._file_data = None
self.dir_name = dir_name
dir_path = dir_name.replace("/", os.path.sep)
self.rel_path = os.path.join(dir_path, "META.yml")
self.links = DeleteTrackingList()
self._file_data = self._load_file(self.rel_path)
for link in self._file_data.get("links", []):
for result in link.get("results", []):
self.links.append(MetaEntry.from_file_data(self, link, result))
def _load_file(self, rel_path: str) -> dict[str, Any]:
if self.owner.reader.exists(rel_path):
data = yaml.safe_load(self.owner.reader.read_path(rel_path))
else:
data = {}
return data
def iter(self,
test_id: str | None = None,
product: str | None = None,
subtest: str | None = None,
status: str | None = None) -> Iterator[MetaEntry]:
"""Iterator over all links in the file, filtered by arguments"""
for item in self.links:
if ((product is None or
(item.product is not None and
item.product.startswith(product))) and
(test_id is None or
item.test_id == test_id) and
(subtest is None or
getattr(item, "subtest", None) == subtest) and
(status is None or
item.status == status)):
yield item
def write(self, reread: bool = True) -> bool:
"""Write the updated data to the underlying META.yml
:param reread: Reread the underlying data before applying changes
"""
data = self._get_data(reread)
self._update_data(data)
self.owner.writer.write(self.rel_path, yaml.safe_dump(data, encoding="utf8"))
self._file_data = data
self.links._deleted = []
for link in self.links:
link._initial_state = link.state
return True
def _get_data(self, reread: bool = True) -> dict[str, Any]:
if not reread:
assert self._file_data is not None
data = deepcopy(self._file_data)
else:
data = self._load_file(self.rel_path)
return data
def _update_data(self,
data: dict[str, Any],
) -> dict[str, Any]:
links_by_state = OrderedDict()
for item in data.get("links", []):
label = item.get("label")
url = item.get("url")
product = item.get("product")
for result in item["results"]:
test_id = "/{}/{}".format(self.dir_name, result.get("test"))
subtest = result.get("subtest")
status = result.get("status")
links_by_state[LinkState(label, url, product, test_id, subtest, status)] = (
LinkState(label, url, product, test_id, subtest, status))
# Remove deletions first so that delete and readd works
for item in self.links._deleted:
if item._initial_state in links_by_state:
del links_by_state[item._initial_state]
for item in self.links:
if item._initial_state in links_by_state:
links_by_state[item._initial_state] = item.state
else:
links_by_state[item.state] = item.state
by_link: OrderedDict[tuple[str | None, str | None, str],
list[dict[str, Any]]] = OrderedDict()
for link in links_by_state.values():
result = {}
test_id = link.test_id
if test_id is not None:
_, test = parse_test(test_id)
result["test"] = test
for prop in ["subtest", "status"]:
value = getattr(link, prop)
if value is not None:
result[prop] = value
key = (link.label, link.url, link.product)
if key not in by_link:
by_link[key] = []
by_link[key].append(result)
links = []
for (label, url, product), results in by_link.items():
link_data = {"results": results}
for link_key, value in [("label", label), ("url", url), ("product", product)]:
if value is not None:
link_data[link_key] = value
links.append(link_data)
data["links"] = links
return data
LinkState = namedtuple("LinkState", ["label", "url", "product", "test_id", "subtest", "status"])
class MetaEntry:
__metaclass__ = ABCMeta
def __init__(self, meta_file: MetaFile, test_id: str) -> None:
"""A single link object"""
assert test_id.startswith("/")
self.meta_file = meta_file
self.test_id = test_id
self._initial_state: LinkState | None = None
@staticmethod
def from_file_data(meta_file: MetaFile, link: dict[str, Any],
result: dict[str, str]) -> MetaLink | MetaEntry:
if "label" in link:
return MetaLabel.from_file_data(meta_file, link, result)
elif "url" in link:
return MetaLink.from_file_data(meta_file, link, result)
else:
raise ValueError("Unable to load metadata entry")
def __repr__(self):
return f"<{self.__class__.__name__} test_id: {self.test_id}>"
@property
@abstractmethod
def state(self) -> LinkState:
pass
def delete(self) -> None:
"""Remove the link from the owning file"""
self.meta_file.links.remove(self)
class MetaLabel(MetaEntry):
def __init__(self,
meta_file: MetaFile,
test_id: str,
label: str,
url: str | None,
product: str | None = None,
status: str | None = None,
) -> None:
"""A single link object"""
super().__init__(meta_file, test_id)
self.label = label
self.url = url
self.product = product
self.status = status
@classmethod
def from_file_data(cls, meta_file: MetaFile, link: dict[str, Any],
result: dict[str, str]) -> MetaLabel:
test_id = "/{}/{}".format(meta_file.dir_name, result["test"])
label = link["label"]
url = link.get("url")
product = link.get("product")
status = result.get("status")
self = cls(meta_file, test_id, label, url, product, status)
self._initial_state = self.state
return self
def __repr__(self):
base = super().__repr__()
return (f"{base[:-1]} label: {self.label} url: {self.url} "
f"product: {self.product} status: {self.status}>")
@property
def state(self) -> LinkState:
return LinkState(self.label,
self.url,
self.product,
self.test_id,
None,
self.status)
class MetaLink(MetaEntry):
def __init__(self,
meta_file: MetaFile,
test_id: str,
url: str,
product: str | None,
subtest: str | None = None,
status: str | None = None,
) -> None:
"""A single link object"""
super().__init__(meta_file, test_id)
self.url = url
self.product = product
self.subtest = subtest
self.status = status
@classmethod
def from_file_data(cls, meta_file: MetaFile, link: dict[str, Any],
result: dict[str, str]) -> MetaLink:
test_id = "/{}/{}".format(meta_file.dir_name, result["test"])
url = link["url"]
product = link.get("product")
status = result.get("status")
subtest = result.get("subtest")
self = cls(meta_file, test_id, url, product, subtest, status)
self._initial_state = self.state
return self
def __repr__(self):
base = super().__repr__()
return (f"{base[:-1]} url: {self.url} product: {self.product} "
f"status: {self.status} subtest: {self.subtest}>")
@property
def state(self) -> LinkState:
return LinkState(None,
self.url,
self.product,
self.test_id,
self.subtest,
self.status)