def get_multimodal_embeddings()

in use-cases/rag-pipeline/backend/src/generate_embeddings.py [0:0]


def get_multimodal_embeddings(image_uri, desc):
    """
    Fetches multimodal embeddings from a multimodal embedding API using text description and image URI.

    Args:
        image_uri: The URI of the image.
        desc: The text description of the product from product catalog.

    Returns:
        The multimodal embeddings as a JSON object.

    Raises:
        requests.exceptions.HTTPError: If there is an error fetching the multimodal embeddings
                                       or the API returns an invalid response.
    """
    try:
        response = requests.post(
            MULTIMODAL_API_ENDPOINT,
            json={"image_uri": image_uri, "caption": desc},
            headers={"Content-Type": "application/json"},
            timeout=100,
        )

        response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
        return response.json()["multimodal_embeds"]

    except requests.exceptions.HTTPError as e:
        logger.exception("Error fetching multimodal embedding: %s", e)
        raise

    except requests.exceptions.RequestException as e:
        logger.exception("Error fetching multimodal embedding: %s", e)
        raise requests.exceptions.HTTPError(
            "Error fetching multimodal embedding", response=requests.Response()
        ) from e

    except (ValueError, TypeError) as e:
        logger.exception(
            "Not able to decode received json from multimodal embedding API: %s", e
        )
        raise requests.exceptions.HTTPError(
            "Invalid response from multimodal embedding API",
            response=requests.Response(),
        ) from e