in inference.py [0:0]
def predict_fn(input_object, model):
logger.info("Calling model")
start_time = time.time()
sentence_embeddings = embed_tformer(model['model'], model['tokenizer'], input_object)
print("--- Inference time: %s seconds ---" % (time.time() - start_time))
response = sentence_embeddings[0].tolist()
return response