def fitandtokenize()

in inference/etl.py [0:0]


def fitandtokenize(data):
    from tensorflow.keras.preprocessing.sequence import pad_sequences
    # sagemaker container path
    tokenizer = pd.read_pickle("/opt/ml/model/code/tokenizer.pkl")
    #convert each text into array of integers
    data = tokenizer.texts_to_sequences(data)
    data = pad_sequences(data, maxlen = MAX_SEQUENCE_LENGTH)
    return data