in src/datatrove/pipeline/formatters/symbol_lines_remover.py [0:0]
def format(self, text: str) -> str:
formatted = []
in_removed_span = False
for line in text.splitlines():
chars_line = line.strip() != "" and all(c in self.symbols_to_remove or c == " " for c in line)
if chars_line and not in_removed_span:
if self.replace_char:
formatted.append(self.replace_char)
in_removed_span = True
elif not chars_line:
formatted.append(line)
in_removed_span = False
return "\n".join(formatted)