in voxpopuli/text/wer_tools.py [0:0]
def get_align_index_path(query: Iterable, target: Iterable) -> List[CharAlignToken]:
path_ = edlib.align(query, target, task="path")["cigar"]
if path_ is None:
return []
path_ = expand_cigar_format(path_)
index_out = 0
index_path = 0
out = []
for index_query in range(len(query)):
while path_[index_path] == "D":
index_out += 1
index_path += 1
action = path_[index_path]
out.append(CharAlignToken(index_out, action))
if action == "=":
assert query[index_query] == target[index_out]
if action in ["=", "X"]:
index_out += 1
index_path += 1
return out