in cc_net/text_normalizer.py [0:0]
def normalize(line: str, accent=True, case=True, numbers=True, punct=1) -> str:
line = line.strip()
if not line:
return line
if case:
line = line.lower()
if accent:
line = strip_accents(line)
if numbers:
line = DIGIT_RE.sub("0", line)
if punct == 1:
line = replace_unicode_punct(line)
elif punct == 2:
line = remove_unicode_punct(line)
line = remove_non_printing_char(line)
return line