in recipes/data/switchboard/utils.py [0:0]
def normalize_acronyms(line, acronym_dict):
# Taken from https://git.io/fjhbu
# Original Author - Minhua Wu
dict_acronym = {}
dict_acronym_noi = {} # Mapping of acronyms without I, i
for k, v in acronym_dict.items():
dict_acronym[k] = v.strip()
dict_acronym_noi[k] = v.strip()
del dict_acronym_noi["i"]
del dict_acronym_noi["I"]
line = "<dummy-id> " + line.strip()
items = line.split()
L = len(items)
# First pass mapping to map I as part of acronym
for i in range(L):
if items[i] == "i":
x = 0
while i - 1 - x >= 0 and re.match(r"^[A-Z]$", items[i - 1 - x]):
x += 1
y = 0
while i + 1 + y < L and re.match(r"^[A-Z]$", items[i + 1 + y]):
y += 1
if x + y > 0:
for bias in range(-x, y + 1):
items[i + bias] = dict_acronym[items[i + bias]]
# Second pass mapping (not mapping 'i' and 'I')
for i in range(len(items)):
if items[i] in dict_acronym_noi.keys():
items[i] = dict_acronym_noi[items[i]]
return " ".join(items[1:])