in obelics/processors/pre_extraction_simplificator.py [0:0]
def _format_texts(self, list_nodes):
def format_one_text(text):
if text == "":
return text
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = re.sub(r"[ ]{2,}", " ", text)
beg_sep = " " == text[0]
end_sep = (" " == text[-1]) and (len(text) > 1)
text = "\n".join([el.strip() for el in text.split("#BR_TAG#")])
text = beg_sep * " " + text + end_sep * " "
return text
for idx, node in enumerate(list_nodes):
list_nodes[idx].text = format_one_text(node.text)
list_nodes = [node for node in list_nodes if (node.tag != "-text") or ((node.tag == "-text") and (node.text))]
return list_nodes