in probe_scraper/probe_expiry_alert.py [0:0]
def get_longest_prefix(values: List[str], tolerance: int = 0) -> str:
"""
Return the longest matching prefix among the list of strings.
If a prefix is less than 4 characters, return the first string.
Tolerance allows some characters to not match and returns the highest occurring prefix.
"""
if tolerance < 0:
raise ValueError("tolerance must be >= 0")
if len(values) == 1:
return values[0]
if len(values) == 0:
return ""
if tolerance > 0:
longest_value_length = max(len(v) for v in values)
values = [v.ljust(longest_value_length) for v in values]
prefix_length = 0
for c in zip(*values):
if len(set(c)) > min([1 + tolerance, len(values) - 1]):
break
prefix_length += 1
if prefix_length < 4:
return values[0]
if tolerance == 0:
return values[0][:prefix_length]
prefix_count = defaultdict(int)
for value in values:
prefix_count[value[:prefix_length]] += 1
return (
sorted(prefix_count.items(), key=lambda item: item[1], reverse=True)[0][0] + "*"
)