def get_error_search_term_and_path()

in treeherder/model/error_summary.py [0:0]


def get_error_search_term_and_path(error_line):
    """
    Generate a search term from the given error_line string.

    Attempt to build a search term that will yield meaningful results when used
    in a FTS query.
    """
    if not error_line:
        return None

    # This is strongly inspired by
    # https://hg.mozilla.org/webtools/tbpl/file/tip/php/inc/AnnotatedSummaryGenerator.php#l73

    tokens = error_line.split(" | ")
    search_term = None
    path_end = None

    if len(tokens) >= 3:
        is_crash = "PROCESS-CRASH" in tokens[0]
        # it's in the "FAILURE-TYPE | testNameOrFilePath | message" type format.
        test_name_or_path = tokens[1]
        message = tokens[2]
        if is_crash:
            test_name_or_path = tokens[2]
            message = tokens[1]
        # Leak failure messages are of the form:
        # leakcheck | .*\d+ bytes leaked (Object-1, Object-2, Object-3, ...)
        match = LEAK_RE.search(message)
        if match:
            search_term = match.group(1) if match.group(1) is not None else match.group(2)
        else:
            # For reftests, remove the reference path from the tokens as this is
            # not very unique
            test_name_or_path = REFTEST_RE.sub("", test_name_or_path).replace("\\", "/")
            # split marionette paths to only include the filename
            if MARIONETTE_RE.search(test_name_or_path):
                test_name_or_path = f"{test_name_or_path.split('.py ')[0]}.py"
            path_end = test_name_or_path
            # if this is a path, we are interested in the last part
            search_term = test_name_or_path.split("/")[-1]
            if is_crash:
                search_term = message

    # If the failure line was not in the pipe symbol delimited format or the search term
    # will likely return too many (or irrelevant) results (eg: too short or matches terms
    # on the blacklist), then we fall back to searching for the entire failure line if
    # it is suitable.
    if not (search_term and is_helpful_search_term(search_term)):
        if is_helpful_search_term(error_line):
            search_term = error_line
        else:
            search_term = None
            if path_end and "/" not in path_end:
                path_end = None

    # Searching for extremely long search terms is undesirable, since:
    # a) Bugzilla's max summary length is 256 characters, and once "Intermittent "
    # and platform/suite information is prefixed, there are even fewer characters
    # left for us to use for the failure string against which we need to match.
    # b) For long search terms, the additional length does little to prevent against
    # false positives, but means we're more susceptible to false negatives due to
    # run-to-run variances in the error messages (eg paths, process IDs).
    if search_term:
        search_term = re.sub(PREFIX_PATTERN, "", search_term)
        search_term = search_term[:100]

    # for wpt tests we have testname.html?params, we need to add a search term
    # for just testname.html.
    # we will now return an array
    if search_term and "?" in search_term:
        search_name = search_term.split("?")[0]
        search_term = [search_term, search_name]
    else:
        search_term = [search_term]

    return {
        "search_term": search_term,
        "path_end": path_end,
    }