def get_span_labels()

in relogic/logickit/utils/utils.py [0:0]


def get_span_labels(sentence_tags, is_head=None, segment_id=None, inv_label_mapping=None, ignore_label=list([])):
  """Go from token-level labels to list of entities (start, end, class)."""
  if inv_label_mapping:
    sentence_tags = [inv_label_mapping[i] for i in sentence_tags]
  filtered_sentence_tag = []
  if is_head:
    # assert(len(sentence_tags) == len(is_head))

    for idx, (head, segment) in enumerate(zip(is_head, segment_id)):
      if (head == 1 or head == True) and (segment == 0 or segment == True):
        if sentence_tags[idx] != 'X':
          filtered_sentence_tag.append(sentence_tags[idx])
        else:
          filtered_sentence_tag.append("O")
  if filtered_sentence_tag:
    sentence_tags = filtered_sentence_tag
  span_labels = []
  last = 'O'
  start = -1
  for i, tag in enumerate(sentence_tags):
    items = (None, 'O') if tag == 'O' else tag.split('-', 1)
    pos, _ = items if len(items) == 2 else (items[0], None)
    if (pos == 'S' or pos == 'B' or tag == 'O') and last != 'O':
      span_labels.append((start, i - 1, None if len(last.split('-', 1)) != 2 else last.split('-', 1)[-1]))
    if pos == 'B' or pos == 'S' or last == 'O':
      start = i
    last = tag
  if sentence_tags[-1] != 'O':
    span_labels.append((start, len(sentence_tags) - 1,
                        None if len(last.split('-', 1)) != 2 else last.split('-', 1)[-1]))

  # This code has problem!
  # for item in span_labels:
  #   if item[2] in ignore_label:
  #     span_labels.remove(item)

  filtered_labels = []
  for item in span_labels:
    if item[2] not in ignore_label:
      filtered_labels.append(item)

  return set(filtered_labels), sentence_tags