in treeherder/model/error_summary.py [0:0]
def get_error_search_term_and_path(error_line):
"""
Generate a search term from the given error_line string.
Attempt to build a search term that will yield meaningful results when used
in a FTS query.
"""
if not error_line:
return None
# This is strongly inspired by
# https://hg.mozilla.org/webtools/tbpl/file/tip/php/inc/AnnotatedSummaryGenerator.php#l73
tokens = error_line.split(" | ")
search_term = None
path_end = None
if len(tokens) >= 3:
is_crash = "PROCESS-CRASH" in tokens[0]
# it's in the "FAILURE-TYPE | testNameOrFilePath | message" type format.
test_name_or_path = tokens[1]
message = tokens[2]
if is_crash:
test_name_or_path = tokens[2]
message = tokens[1]
# Leak failure messages are of the form:
# leakcheck | .*\d+ bytes leaked (Object-1, Object-2, Object-3, ...)
match = LEAK_RE.search(message)
if match:
search_term = match.group(1) if match.group(1) is not None else match.group(2)
else:
# For reftests, remove the reference path from the tokens as this is
# not very unique
test_name_or_path = REFTEST_RE.sub("", test_name_or_path).replace("\\", "/")
# split marionette paths to only include the filename
if MARIONETTE_RE.search(test_name_or_path):
test_name_or_path = f"{test_name_or_path.split('.py ')[0]}.py"
path_end = test_name_or_path
# if this is a path, we are interested in the last part
search_term = test_name_or_path.split("/")[-1]
if is_crash:
search_term = message
# If the failure line was not in the pipe symbol delimited format or the search term
# will likely return too many (or irrelevant) results (eg: too short or matches terms
# on the blacklist), then we fall back to searching for the entire failure line if
# it is suitable.
if not (search_term and is_helpful_search_term(search_term)):
if is_helpful_search_term(error_line):
search_term = error_line
else:
search_term = None
if path_end and "/" not in path_end:
path_end = None
# Searching for extremely long search terms is undesirable, since:
# a) Bugzilla's max summary length is 256 characters, and once "Intermittent "
# and platform/suite information is prefixed, there are even fewer characters
# left for us to use for the failure string against which we need to match.
# b) For long search terms, the additional length does little to prevent against
# false positives, but means we're more susceptible to false negatives due to
# run-to-run variances in the error messages (eg paths, process IDs).
if search_term:
search_term = re.sub(PREFIX_PATTERN, "", search_term)
search_term = search_term[:100]
# for wpt tests we have testname.html?params, we need to add a search term
# for just testname.html.
# we will now return an array
if search_term and "?" in search_term:
search_name = search_term.split("?")[0]
search_term = [search_term, search_name]
else:
search_term = [search_term]
return {
"search_term": search_term,
"path_end": path_end,
}