def normalize_acronyms()

in recipes/data/switchboard/utils.py [0:0]


def normalize_acronyms(line, acronym_dict):
    # Taken from https://git.io/fjhbu
    # Original Author - Minhua Wu

    dict_acronym = {}
    dict_acronym_noi = {}  # Mapping of acronyms without I, i
    for k, v in acronym_dict.items():
        dict_acronym[k] = v.strip()
        dict_acronym_noi[k] = v.strip()
    del dict_acronym_noi["i"]
    del dict_acronym_noi["I"]

    line = "<dummy-id> " + line.strip()
    items = line.split()
    L = len(items)
    # First pass mapping to map I as part of acronym
    for i in range(L):
        if items[i] == "i":
            x = 0
            while i - 1 - x >= 0 and re.match(r"^[A-Z]$", items[i - 1 - x]):
                x += 1

            y = 0
            while i + 1 + y < L and re.match(r"^[A-Z]$", items[i + 1 + y]):
                y += 1

            if x + y > 0:
                for bias in range(-x, y + 1):
                    items[i + bias] = dict_acronym[items[i + bias]]

    # Second pass mapping (not mapping 'i' and 'I')
    for i in range(len(items)):
        if items[i] in dict_acronym_noi.keys():
            items[i] = dict_acronym_noi[items[i]]
    return " ".join(items[1:])