in use-cases/rag-pipeline/embedding-models/multimodal-embedding/src/blip2_server.py [0:0]
def get_image_embedding(image):
"""Generates multimodal embeddings for a given image and caption.
Args:
image: A PIL.Image object.
caption: The input caption as a string.
Returns:
A torch.Tensor containing the multimodal embeddings.
Raises:
ValueError: If there is an error generating the multimodal embedding.
"""
try:
image = vis_processors["eval"](image).unsqueeze(0).to(device)
sample = {"image": image}
features_image = model.extract_features(sample, mode="image")
return features_image.image_embeds
except Exception as e:
raise ValueError(f"Error generating image embedding: {e}")