def get_longest_prefix()

in probe_scraper/probe_expiry_alert.py [0:0]


def get_longest_prefix(values: List[str], tolerance: int = 0) -> str:
    """
    Return the longest matching prefix among the list of strings.
    If a prefix is less than 4 characters, return the first string.
    Tolerance allows some characters to not match and returns the highest occurring prefix.
    """
    if tolerance < 0:
        raise ValueError("tolerance must be >= 0")
    if len(values) == 1:
        return values[0]
    if len(values) == 0:
        return ""

    if tolerance > 0:
        longest_value_length = max(len(v) for v in values)
        values = [v.ljust(longest_value_length) for v in values]

    prefix_length = 0
    for c in zip(*values):
        if len(set(c)) > min([1 + tolerance, len(values) - 1]):
            break
        prefix_length += 1

    if prefix_length < 4:
        return values[0]

    if tolerance == 0:
        return values[0][:prefix_length]

    prefix_count = defaultdict(int)
    for value in values:
        prefix_count[value[:prefix_length]] += 1

    return (
        sorted(prefix_count.items(), key=lambda item: item[1], reverse=True)[0][0] + "*"
    )