in lambdachecker/sms_spam_classifier_utilities.py [0:0]
def one_hot(text, n,
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
lower=True,
split=' '):
"""One-hot encodes a text into a list of word indexes of size n.
This is a wrapper to the `hashing_trick` function using `hash` as the
hashing function; unicity of word to index mapping non-guaranteed.
# Arguments
text: Input text (string).
n: int. Size of vocabulary.
filters: list (or concatenation) of characters to filter out, such as
punctuation. Default: `!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n`,
includes basic punctuation, tabs, and newlines.
lower: boolean. Whether to set the text to lowercase.
split: str. Separator for word splitting.
# Returns
List of integers in [1, n]. Each integer encodes a word
(unicity non-guaranteed).
"""
return hashing_trick(text, n,
hash_function='md5',
filters=filters,
lower=lower,
split=split)