sync/notify/bugs.py (337 lines of code) (raw):
from __future__ import annotations
import json
import re
import os
import subprocess
from collections import defaultdict
import newrelic
from .msg import detail_part
from .. import log
from ..bugcomponents import components_for_wpt_paths
from ..env import Environment
from ..lock import mut
from ..meta import Metadata
from ..projectutil import Mach
from typing import Iterable, Mapping, MutableMapping, Optional, Tuple, TYPE_CHECKING
from git.repo.base import Repo
if TYPE_CHECKING:
from sync.downstream import DownstreamSync
from sync.notify.results import Result, Results
from sync.notify.results import TestResult
from sync.notify.results import ResultsEntry
ResultsEntryStatus = Tuple[str, Optional[str], Result, Optional[str]]
logger = log.get_logger(__name__)
env = Environment()
postscript = """Note: this bug is for tracking fixing the issues and is not
owned by the wpt sync bot.
This bug is linked to the relevant tests by an annotation in
https://github.com/web-platform-tests/wpt-metadata. These annotations
can be edited using the wpt interop dashboard
https://jgraham.github.io/wptdash/
If this bug is split into multiple bugs, please also update the
annotations, otherwise we are unable to track which wpt issues are
already triaged. Resolving as duplicate or closing this issue should
be cause the bot to automatically update or remove the annotation.
"""
def test_ids_to_paths(git_work: Repo, test_ids: list[str]) -> dict[str, list[str]]:
mach = Mach(git_work.working_dir)
data = {}
min_idx = 0
group = 100
try:
while min_idx < len(test_ids):
data_str = mach.wpt_test_paths("--json", *test_ids[min_idx:min_idx + group])
data.update(json.loads(data_str))
min_idx += group
except subprocess.CalledProcessError:
newrelic.agent.record_exception()
# Fall back to a manual mapping of test ids to paths
data = fallback_test_ids_to_paths(test_ids)
return data
def fallback_test_ids_to_paths(test_ids: list[str]) -> dict:
"""Fallback for known rules mapping test_id to path, for cases where we
can't read the manifest"""
data = defaultdict(list)
any_re = re.compile(r"(.*)\.any.(?:[^\.]*\.)?html$")
for test_id in test_ids:
prefix = env.config["gecko"]["path"]["wpt"]
suffix = test_id
if test_id.startswith("/_mozilla/"):
prefix = os.path.normpath(
os.path.join(env.config["gecko"]["path"]["wpt"], "..", "mozilla", "tests"))
suffix = suffix[len("/_mozilla"):]
m = any_re.match(test_id)
if m:
suffix = m.groups()[0] + ".any.js"
elif test_id.endswith(".worker.html"):
suffix = test_id.rsplit(".", 1)[0] + ".js"
elif test_id.endswith(".sharedworker.html"):
suffix = test_id.rsplit(".", 1)[0] + ".js"
elif test_id.endswith(".window.html"):
suffix = test_id.rsplit(".", 1)[0] + ".js"
path = prefix + suffix
data[path].append(test_id)
return data
def filter_test_failures(test: str, subtest: str, result: TestResult) -> bool:
if result.has_link():
return False
if result.has_regression("firefox"):
return True
if result.is_browser_only_failure("firefox"):
return True
if result.has_new_non_passing("firefox"):
if not result.has_non_disabled():
return False
if not result.has_passing():
return False
if not result.is_github_only_failure():
return False
return True
return False
@mut('sync')
def for_sync(sync: DownstreamSync,
results: Results,
) -> Mapping[int, list[ResultsEntryStatus]]:
"""Create the bugs for followup work for test problems found in a sync.
This creates bugs that will be owned by the triage owner of the component
for any followup work that's revealed by the changes in a sync. Currently
this is for crashes and certain kinds of failures e.g. Firefox-only failures
that are not already known in the wpt metadata.
:returns: A dict {bug_id: bug_info} where bug_info is a list of test results
that are included in the bug, each represented as a tuple
(test_id, subtest, results, status)"""
rv: MutableMapping[int, list[ResultsEntryStatus]] = {}
newrelic.agent.record_custom_event("sync_bug", params={
"sync_bug": sync.bug,
})
new_crashes = list(results.iter_filter(lambda _test, _subtest, result:
(result.has_crash("firefox") and
not result.has_link(status="CRASH"))))
new_failures = list(results.iter_filter(filter_test_failures))
if not new_failures and not new_crashes:
newrelic.agent.record_custom_event("sync_bug_nothing_relevant", params={
"sync_bug": sync.bug,
})
return rv
existing = sync.notify_bugs
git_work = sync.gecko_worktree.get()
path_prefix = env.config["gecko"]["path"]["wpt"]
seen = set()
for key, test_results, bug_data, link_status, require_opt_in in [
("crash", new_crashes, bug_data_crash, "CRASH", False),
("failure", new_failures, bug_data_failure, None, True)]:
# Tests excluding those for which we already generated a bug
test_results = [item for item in test_results
if (item[0], item[1]) not in seen]
if not test_results:
continue
seen |= {(item[0], item[1]) for item in test_results}
test_ids = list({test_id for test_id, _subtest, _result in test_results})
test_id_by_path = test_ids_to_paths(git_work, test_ids)
test_path_by_id = {}
for path, ids in test_id_by_path.items():
for test_id in ids:
test_path_by_id[test_id] = os.path.relpath(path, path_prefix)
paths = set(test_path_by_id.values())
logger.info(f"Got paths {paths}")
components = components_for_wpt_paths(git_work, paths)
components_by_path = {}
for component, component_paths in components.items():
for path in component_paths:
components_by_path[path] = component
test_results_by_component = defaultdict(list)
for test_id, subtest, test_result in test_results:
test_path = test_path_by_id.get(test_id)
if not test_path:
# This can be missing if the test landed in a commit that's upstream but not here
# so it's in wpt.fyi data but not here
continue
component = components_by_path[test_path]
test_results_by_component[component].append((test_id, subtest, test_result))
opt_in_components = {item.strip()
for item in
env.config["notify"].get("components", "").split(";")}
for component, test_results in test_results_by_component.items():
if component == "UNKNOWN":
# For things with no component don't file a bug
continue
component_key = f"{key} :: {component}"
if require_opt_in and component not in opt_in_components:
logger.info("Not filing bugs for component %s" % component)
newrelic.agent.record_custom_event("sync_bug_not_enabled", params={
"sync_bug": sync.bug,
"component": component
})
continue
if component_key not in existing:
product, component = component.split(" :: ")
summary, comment = bug_data(sync,
test_results,
results.treeherder_url,
results.wpt_sha)
depends = []
if sync.bug:
depends = [sync.bug]
bug_id = make_bug(summary, comment, product, component, depends)
sync.notify_bugs = sync.notify_bugs.copy(**{component_key: bug_id}) # type: ignore
newrelic.agent.record_custom_event("sync_bug_filing", params={
"sync_bug": sync.bug,
"component": component
})
else:
newrelic.agent.record_custom_event("sync_bug_existing", params={
"sync_bug": sync.bug,
"component": component
})
bug_id = existing[component_key]
rv[bug_id] = [item + (link_status,) for item in test_results]
return rv
class LengthCappedStringBuilder:
def __init__(self, max_length: int) -> None:
"""Builder for a string that must not exceed a given length"""
self.max_length = max_length
self.data: list[str] = []
self.current_length = 0
def append(self, other: str) -> bool:
"""Add a string the end of the data. Returns True if the add was
a success i.e. the new string is under the length limit, otherwise
False"""
len_other = len(other)
if len_other + self.current_length > self.max_length:
return False
self.data.append(other)
self.current_length += len_other
return True
def has_capacity(self, chars: int) -> bool:
"""Check if we have chars remaining capacity in the string"""
return self.current_length + chars <= self.max_length
def get(self) -> str:
"""Return the complete string"""
return "".join(self.data)
def split_id(test_id: str) -> tuple[str, ...]:
"""Convert a test id into a list of path parts, preserving the hash
and query fragments on the final part.
Unlike urlparse we don't split out the query or fragment, we want to
preserve the invariant that "/".join(split_id(test_id)) == test_id
:param test_id: The id of a test consisting of a url piece containing
a path and optionally a query and/or fragment
:returns: [id_parts] consisting of all the path parts split on /
with the final element retaining any non-path parts
"""
parts = test_id.split("/")
last = None
for i, part in enumerate(parts):
if "#" in part or "?" in part:
last = i
break
if last:
name = "/".join(parts[i:])
parts = parts[:i]
parts.append(name)
return tuple(parts)
def get_common_prefix(test_ids: Iterable[str]
) -> tuple[list[tuple[str, ...]], tuple[str, ...]]:
"""Given a list of test ids, return the paths split into directory parts,
and the longest common prefix directory shared by all the inputs.
:param test_ids: - List of test_ids
:returns: ([split_name], common_prefix) The unique test_ids split on / and
the longest path prefix shared by all test ids (excluding filename
parts
"""
test_ids_list = list(test_ids)
common_prefix = split_id(test_ids_list[0])[:-1]
seen_names = set()
split_names = []
for test_id in test_ids_list:
split_name = split_id(test_id)
if split_name in seen_names:
continue
seen_names.add(split_name)
split_names.append(split_name)
common_prefix = os.path.commonprefix([item[:-1] for item in split_names]) # type: ignore
return split_names, common_prefix
def make_summary(test_results: list[ResultsEntry],
prefix: str,
max_length: int = 255,
max_tests: int = 3,
) -> str:
"""Construct a summary for the bugs based on the test results.
The approach here is to start building the string up using the
LengthCappedStringBuilder and when we get to an optional part check
if we have the capacity to add that part in, otherwise use an
alternative.
:param test_results: List of (test_id, subtest, result)
:param prefix: String prefix to use at the start of the summary
:param max_length: Maximum length of the summary to create
:param max_tests: Maximum number of tests names to include in the
output
:returns: String containing a constructed bug summary
"""
if len(prefix) > max_length:
raise ValueError("Prefix is too long")
# Start with the prefix
summary = LengthCappedStringBuilder(max_length)
summary.append(prefix)
# If we can fit some of the common path prefix, add that
split_names, common_test_prefix = get_common_prefix(item[0] for item in test_results)
joiner = " in "
if not summary.has_capacity(len(joiner) + len(common_test_prefix[0]) + 1):
return summary.get()
# Keep adding as much of the common path prefix as possible
summary.append(joiner)
for path_part in common_test_prefix:
if not summary.append("%s/" % path_part):
return summary.get()
test_names = ["/".join(item[len(common_test_prefix):]) for item in split_names]
# If there's a single test name add that and we're done
if len(test_names) == 1:
summary.append(test_names[0])
return summary.get()
# If there are multiple test names, add up to max_tests of those names
# and a suffix
prefix = " ["
# suffix is ", and N others]", N is at most len(test_results) so reserve that many
# characters
tests_remaining = len(test_names)
suffix_length = len(", and others]") + len(str(tests_remaining))
if summary.has_capacity(len(test_names[0]) + len(prefix) + suffix_length):
summary.append(prefix)
summary.append(test_names[0])
tests_remaining -= 1
for test_name in test_names[1:max_tests]:
if summary.has_capacity(2 + len(test_name) + suffix_length):
summary.append(", %s" % test_name)
tests_remaining -= 1
if tests_remaining > 0:
summary.append(", and %s others]" % tests_remaining)
else:
summary.append("]")
else:
# If we couldn't fit any test names in try just adding the number of tests
summary.append(" [%s tests]" % tests_remaining)
return summary.get()
def bug_data_crash(sync: DownstreamSync,
test_results: list[ResultsEntry],
treeherder_url: str | None,
wpt_sha: str | None,
) -> tuple[str, str]:
summary = make_summary(test_results,
"New wpt crashes")
if treeherder_url is not None:
treeherder_text = "[Gecko CI (Treeherder)](%s)" % treeherder_url
else:
treeherder_text = "Missing results from treeherder"
if wpt_sha is not None:
wpt_text = "[GitHub PR Head](https://wpt.fyi/results/?sha=%s&label=pr_head)" % wpt_sha
else:
wpt_text = "Missing results from GitHub"
comment = """Syncing wpt \
[PR {pr_id}](https://github.com/web-platform-tests/wpt/pull/{pr_id})\
found new crashes in CI
# Affected Tests
{details}
# CI Results
{treeherder_text}
{wpt_text}
# Notes
Getting the crash signature into these bug reports is a TODO; sorry
These updates will be on mozilla-central once bug {sync_bug_id} lands.
{postscript}""".format(pr_id=sync.pr,
details=detail_part(None, test_results, None, "head", False),
treeherder_text=treeherder_text,
wpt_text=wpt_text,
sync_bug_id=sync.bug,
postscript=postscript)
return summary, comment
def bug_data_failure(sync: DownstreamSync,
test_results: list[ResultsEntry],
treeherder_url: str | None,
wpt_sha: str | None,
) -> tuple[str, str]:
summary = make_summary(test_results,
"New wpt failures")
by_type = defaultdict(list)
for (test, subtest, result) in test_results:
if result.is_browser_only_failure("firefox"):
by_type["firefox-only"].append((test, subtest, result))
elif result.has_regression("firefox"):
by_type["regression"].append((test, subtest, result))
elif result.has_new_non_passing("firefox"):
by_type["new-non-passing"].append((test, subtest, result))
detail_msg = []
for (details_type, test_results, include_other_browser) in [
("Firefox-only failures", by_type["firefox-only"], False),
("Tests with a Worse Result After Changes", by_type["regression"], True),
("New Tests That Don't Pass", by_type["new-non-passing"], True)]:
if not test_results:
continue
part = detail_part(details_type, test_results, None, "head",
include_other_browser)
if part is not None:
detail_msg.append(part)
if treeherder_url is not None:
treeherder_text = "[Gecko CI (Treeherder)](%s)" % treeherder_url
else:
treeherder_text = "Missing results from treeherder"
if wpt_sha is not None:
wpt_text = "[GitHub PR Head](https://wpt.fyi/results/?sha=%s&label=pr_head)" % wpt_sha
else:
wpt_text = "Missing results from GitHub"
comment = """Syncing wpt \
[PR {pr_id}](https://github.com/web-platform-tests/wpt/pull/{pr_id}) \
found new untriaged test failures in CI
# Tests Affected
{details}
# CI Results
{treeherder_text}
{wpt_text}
# Notes
These updates will be on mozilla-central once bug {sync_bug_id} lands.
{postscript}""".format(pr_id=sync.pr,
details="\n".join(detail_msg),
treeherder_text=treeherder_text,
wpt_text=wpt_text,
sync_bug_id=sync.bug,
postscript=postscript)
return summary, comment
def make_bug(summary: str, comment: str, product: str, component: str,
depends: list[int]) -> int:
bug_id = env.bz.new(summary, comment, product, component,
whiteboard="[wpt]", bug_type="defect", assign_to_sync=False)
with env.bz.bug_ctx(bug_id) as bug:
for item in depends:
bug.add_depends(item)
return bug_id
@mut('sync')
def update_metadata(sync: DownstreamSync,
bugs: dict[int, list[ResultsEntryStatus]],
) -> None:
newrelic.agent.record_custom_event("sync_bug_metadata", params={
"sync_bug": sync.bug,
"bugs": [bug_id for bug_id, _ in bugs.items()]
})
# TODO: Ensure that the metadata is added to the meta repo
if not bugs:
return
metadata = Metadata.for_sync(sync, create_pr=True)
assert sync._lock is not None
with metadata.as_mut(sync._lock):
for bug_id, test_results in bugs.items():
for (test_id, subtest, results, status) in test_results:
metadata.link_bug(test_id,
env.bz.bugzilla_url(bug_id),
product="firefox",
subtest=subtest,
status=status)