def get_align_index_path()

in voxpopuli/text/wer_tools.py [0:0]


def get_align_index_path(query: Iterable, target: Iterable) -> List[CharAlignToken]:

    path_ = edlib.align(query, target, task="path")["cigar"]
    if path_ is None:
        return []
    path_ = expand_cigar_format(path_)

    index_out = 0
    index_path = 0
    out = []
    for index_query in range(len(query)):
        while path_[index_path] == "D":
            index_out += 1
            index_path += 1

        action = path_[index_path]

        out.append(CharAlignToken(index_out, action))
        if action == "=":
            assert query[index_query] == target[index_out]
        if action in ["=", "X"]:
            index_out += 1

        index_path += 1

    return out