in backends/python/server/text_embeddings_server/models/masked_model.py [0:0]
def embed(self, batch: PaddedBatch) -> List[Embedding]:
kwargs = {"input_ids": batch.input_ids, "attention_mask": batch.attention_mask}
if self.has_token_type_ids:
kwargs["token_type_ids"] = batch.token_type_ids
if self.has_position_ids:
kwargs["position_ids"] = batch.position_ids
output = self.model(**kwargs)
embedding = self.pooling.forward(output, batch.attention_mask)
cpu_results = embedding.view(-1).tolist()
step_size = embedding.shape[-1]
return [
Embedding(values=cpu_results[i * step_size : (i + 1) * step_size])
for i in range(len(batch))
]