in clip/simple_tokenizer.py [0:0]
def whitespace_clean(text): text = re.sub(r'\s+', ' ', text) text = text.strip() return text