in src/ie/process_ner_data.py [0:0]
def transform(slots, text):
new_labels = []
new_text = []
previous_hi = 0
for slot in slots:
values = slot.split(":")
lo = int(values[0]) - 1
hi = int(values[1]) - 1
label = values[2]
if lo > previous_hi:
new_text.append(xfmr.transform(text[previous_hi:lo]))
new_labels.append("")
new_text.append(xfmr.transform(text[lo:hi]))
new_labels.append(label)
previous_hi = hi
if previous_hi < len(text):
new_text.append(xfmr.transform(text[previous_hi:]))
new_labels.append("")
new_text = [v.strip() for v in new_text]
text = ""
labels = ""
for i, v in enumerate(new_text):
label = new_labels[i]
if v == "":
if label == "":
continue
else:
print("bad transform:")
print("original: " + labels + "\t" + text)
print("next text: " + " ".join(new_text))
print("new labels: " + " ".join(new_labels))
exit(1)
if i > 0:
text += " "
if label == "":
text += v
else:
lo = len(text) + 1
text += v
hi = len(text) + 1
if labels != "":
labels += ","
labels += f"{lo}:{hi}:{label}"
return labels + "\t" + text