in inference/etl.py [0:0]
def fitandtokenize(data):
from tensorflow.keras.preprocessing.sequence import pad_sequences
# sagemaker container path
tokenizer = pd.read_pickle("/opt/ml/model/code/tokenizer.pkl")
#convert each text into array of integers
data = tokenizer.texts_to_sequences(data)
data = pad_sequences(data, maxlen = MAX_SEQUENCE_LENGTH)
return data