in code/inference.py [0:0]
def embed_tformer(model, tokenizer, sentences):
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=256, return_tensors='pt')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoded_input.to(device)
#Compute token embeddings
with torch.no_grad():
model_output = model(**encoded_input)
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
return sentence_embeddings