def _format_texts()

in obelics/processors/pre_extraction_simplificator.py [0:0]


    def _format_texts(self, list_nodes):
        def format_one_text(text):
            if text == "":
                return text
            text = text.replace("\n", " ")
            text = text.replace("\t", " ")
            text = re.sub(r"[ ]{2,}", " ", text)
            beg_sep = " " == text[0]
            end_sep = (" " == text[-1]) and (len(text) > 1)
            text = "\n".join([el.strip() for el in text.split("#BR_TAG#")])
            text = beg_sep * " " + text + end_sep * " "
            return text

        for idx, node in enumerate(list_nodes):
            list_nodes[idx].text = format_one_text(node.text)
        list_nodes = [node for node in list_nodes if (node.tag != "-text") or ((node.tag == "-text") and (node.text))]
        return list_nodes