in torchmoji/filter_utils.py [0:0]
def shorten_word(word):
""" Shorten groupings of 3+ identical consecutive chars to 2, e.g. '!!!!' --> '!!'
"""
# only shorten ASCII words
try:
word.decode('ascii')
except (UnicodeDecodeError, UnicodeEncodeError, AttributeError) as e:
return word
# must have at least 3 char to be shortened
if len(word) < 3:
return word
# find groups of 3+ consecutive letters
letter_groups = [list(g) for k, g in groupby(word)]
triple_or_more = [''.join(g) for g in letter_groups if len(g) >= 3]
if len(triple_or_more) == 0:
return word
# replace letters to find the short word
short_word = word
for trip in triple_or_more:
short_word = short_word.replace(trip, trip[0]*2)
return short_word