in decontamination/decontaminate.py [0:0]
def tokenize(text):
"""Normalize text by removing diacritics and tokenize."""
text = "".join(c for c in unicodedata.normalize("NFD", text) if unicodedata.category(c) != "Mn")
tokens = re.findall("\w+", text.lower())
return tokens