def normalize()

in cc_net/text_normalizer.py [0:0]


def normalize(line: str, accent=True, case=True, numbers=True, punct=1) -> str:
    line = line.strip()
    if not line:
        return line
    if case:
        line = line.lower()
    if accent:
        line = strip_accents(line)
    if numbers:
        line = DIGIT_RE.sub("0", line)
    if punct == 1:
        line = replace_unicode_punct(line)
    elif punct == 2:
        line = remove_unicode_punct(line)
    line = remove_non_printing_char(line)
    return line