in relogic/logickit/utils/utils.py [0:0]
def get_span_labels(sentence_tags, is_head=None, segment_id=None, inv_label_mapping=None, ignore_label=list([])):
"""Go from token-level labels to list of entities (start, end, class)."""
if inv_label_mapping:
sentence_tags = [inv_label_mapping[i] for i in sentence_tags]
filtered_sentence_tag = []
if is_head:
# assert(len(sentence_tags) == len(is_head))
for idx, (head, segment) in enumerate(zip(is_head, segment_id)):
if (head == 1 or head == True) and (segment == 0 or segment == True):
if sentence_tags[idx] != 'X':
filtered_sentence_tag.append(sentence_tags[idx])
else:
filtered_sentence_tag.append("O")
if filtered_sentence_tag:
sentence_tags = filtered_sentence_tag
span_labels = []
last = 'O'
start = -1
for i, tag in enumerate(sentence_tags):
items = (None, 'O') if tag == 'O' else tag.split('-', 1)
pos, _ = items if len(items) == 2 else (items[0], None)
if (pos == 'S' or pos == 'B' or tag == 'O') and last != 'O':
span_labels.append((start, i - 1, None if len(last.split('-', 1)) != 2 else last.split('-', 1)[-1]))
if pos == 'B' or pos == 'S' or last == 'O':
start = i
last = tag
if sentence_tags[-1] != 'O':
span_labels.append((start, len(sentence_tags) - 1,
None if len(last.split('-', 1)) != 2 else last.split('-', 1)[-1]))
# This code has problem!
# for item in span_labels:
# if item[2] in ignore_label:
# span_labels.remove(item)
filtered_labels = []
for item in span_labels:
if item[2] not in ignore_label:
filtered_labels.append(item)
return set(filtered_labels), sentence_tags