def embed_tformer()

in code/inference.py [0:0]


def embed_tformer(model, tokenizer, sentences):
    encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=256, return_tensors='pt')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    encoded_input.to(device)

    #Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input)

    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    return sentence_embeddings