def clean_text()

in submission_code/preprocessing.py [0:0]


def clean_text(text):
    text = text.lower()
    text = text.replace('<b>', ' ').replace('</b>', ' ')
    text = text.replace('<u>', ' ').replace('</u>', ' ')
    text = text.replace('&lt;', '<').replace('&gt;', '>')
    for ch in string.punctuation:
        text = text.replace(ch, ' ')
    text = re.sub('\d+', ' ', text)
    text = re.sub('\s+', ' ', text)
    text = text.split()
    text = [stemmer.stem(x) for x in text if x not in stopwords]
    return ' '.join(text)