def preprocess_text()

in blog_example_code/preprocessing.py [0:0]


def preprocess_text(document):
    document = denoise_text(document)
    # Remove all the special characters
    document = re.sub(r"\W", " ", str(document))

    # remove all single characters
    document = re.sub(r"\s+[a-zA-Z]\s+", " ", document)

    # Remove single characters from the start
    document = re.sub(r"\^[a-zA-Z]\s+", " ", document)

    # Substituting multiple spaces with single space
    document = re.sub(r"\s+", " ", document, flags=re.I)

    # Removing prefixed 'b'
    document = re.sub(r"^b\s+", "", document)

    return document