in code/scripts/translate_and_align.py [0:0]
def align_labels(source_utterances, target_utterances, s2t_indexes, source_idx2labels):
# generate target labels.
ret_target_labels = []
for ix, tokens in enumerate(source_utterances):
template = ['O'] * len(target_utterances[ix]) # generate template labels
for jx in range(len(source_utterances[ix])):
if jx in s2t_indexes[ix] and str(jx) in source_idx2labels[ix]:
template[s2t_indexes[ix][jx]] = source_idx2labels[ix][str(jx)]
# add BI labels
state = 'O'
for jx in range(len(template)):
if template[jx] != 'O' and (state == 'O' or state != template[jx]):
state = template[jx]
template[jx] = 'B-' + template[jx]
elif template[jx] != 'O' and state == template[jx]:
template[jx] = 'I-' + template[jx]
elif template[jx] == 'O':
state = 'O'
ret_target_labels.append(template)
return ret_target_labels