in pytext/utils/data.py [0:0]
def merge_token_labels_by_label(token_ranges, labels):
# no bio prefix in labels
begin = token_ranges[0][0]
end = token_ranges[0][1]
summary_list = []
for i in range(1, len(labels)):
# Extend
if labels[i] == labels[i - 1] and labels[i] != Slot.NO_LABEL_SLOT:
end = token_ranges[i][1]
# Update and start new
elif (
(labels[i] != labels[i - 1])
and (labels[i] != Slot.NO_LABEL_SLOT)
and (labels[i - 1] != Slot.NO_LABEL_SLOT)
):
summary_list.append(":".join([str(begin), str(end), labels[i - 1]]))
begin = token_ranges[i][0]
end = token_ranges[i][1]
# Update and skip
elif (
(labels[i] != labels[i - 1])
and (labels[i] == Slot.NO_LABEL_SLOT)
and (labels[i - 1] != Slot.NO_LABEL_SLOT)
):
summary_list.append(":".join([str(begin), str(end), labels[i - 1]]))
# Skip
elif (
(labels[i] != labels[i - 1])
and (labels[i] != Slot.NO_LABEL_SLOT)
and (labels[i - 1] == Slot.NO_LABEL_SLOT)
):
begin = token_ranges[i][0]
end = token_ranges[i][1]
# Take last token into account
if labels[-1] != Slot.NO_LABEL_SLOT:
summary_list.append(":".join([str(begin), str(end), labels[-1]]))
return summary_list