in genre/utils.py [0:0]
def get_entity_spans_finalize(input_sentences, output_sentences, redirections=None):
return_outputs = []
for input_, output_ in zip(input_sentences, output_sentences):
input_ = input_.replace("\xa0", " ") + " -"
output_ = output_.replace("\xa0", " ") + " -"
entities = []
status = "o"
i = 0
j = 0
while j < len(output_) and i < len(input_):
if status == "o":
if input_[i] == output_[j] or (
output_[j] in "()" and input_[i] in "[]{}"
):
i += 1
j += 1
elif output_[j] == " ":
j += 1
elif input_[i] == " ":
i += 1
elif output_[j] == "{":
entities.append([i, 0, ""])
j += 1
status = "m"
else:
raise RuntimeError
elif status == "m":
if input_[i] == output_[j]:
i += 1
j += 1
entities[-1][1] += 1
elif output_[j] == " ":
j += 1
elif input_[i] == " ":
i += 1
elif output_[j] == "}":
j += 1
status = "e"
else:
raise RuntimeError
elif status == "e":
if output_[j] == "[":
j += 1
elif output_[j] != "]":
entities[-1][2] += output_[j]
j += 1
elif output_[j] == "]":
entities[-1][2] = entities[-1][2].replace(" ", "_")
if len(entities[-1][2]) <= 1:
del entities[-1]
elif entities[-1][2] == "NIL":
del entities[-1]
elif redirections is not None and entities[-1][2] in redirections:
entities[-1][2] = redirections[entities[-1][2]]
if len(entities) > 0:
entities[-1] = tuple(entities[-1])
status = "o"
j += 1
else:
raise RuntimeError
return_outputs.append(entities)
return return_outputs