in workshop/preprocessing.py [0:0]
def preprocess_text(document):
document = denoise_text(document)
# Remove all the special characters
document = re.sub(r"\W", " ", str(document))
# remove all single characters
document = re.sub(r"\s+[a-zA-Z]\s+", " ", document)
# Remove single characters from the start
document = re.sub(r"\^[a-zA-Z]\s+", " ", document)
# Substituting multiple spaces with single space
document = re.sub(r"\s+", " ", document, flags=re.I)
# Removing prefixed 'b'
document = re.sub(r"^b\s+", "", document)
return document