def get_image_embedding()

in use-cases/rag-pipeline/embedding-models/multimodal-embedding/src/blip2_server.py [0:0]


def get_image_embedding(image):
    """Generates multimodal embeddings for a given image and caption.

    Args:
        image: A PIL.Image object.
        caption: The input caption as a string.

    Returns:
        A torch.Tensor containing the multimodal embeddings.

    Raises:
        ValueError: If there is an error generating the multimodal embedding.
    """
    try:
        image = vis_processors["eval"](image).unsqueeze(0).to(device)
        sample = {"image": image}
        features_image = model.extract_features(sample, mode="image")
        return features_image.image_embeds
    except Exception as e:
        raise ValueError(f"Error generating image embedding: {e}")