in main.py [0:0]
def get_slot_values_BIO(tokens, labels):
assert len(tokens) == len(labels), f'tokens:{tokens}\nlabels:{labels}'
rectified_labels = []
for l in labels:
if l.startswith('I-'):
if rectified_labels == []:
rectified_labels.append('B-' + l[2:])
elif rectified_labels[-1][2:] == l[2:]:
rectified_labels.append(l)
else:
rectified_labels.append('B-' + l[2:])
else:
rectified_labels.append(l)
labels = rectified_labels
ans = dict()
active_type = None
active_value = ''
for i in range(len(tokens)):
if labels[i] == 'O':
if active_type is not None:
if active_type not in ans:
ans[active_type] = []
ans[active_type].append(active_value)
active_type = None
active_value = ''
elif labels[i].startswith('B-'):
if active_type is not None:
if active_type not in ans:
ans[active_type] = []
ans[active_type].append(active_value)
active_type = labels[i][2:]
active_value = tokens[i]
elif labels[i].startswith('I-'):
if active_type == labels[i][2:]:
active_value = active_value + ' ' + tokens[i]
else:
print(tokens, labels, i, active_type, labels[i][2:])
assert False
else:
assert False
if active_type is not None:
if active_type not in ans:
ans[active_type] = []
ans[active_type].append(active_value)
return ans