in use-cases/rag-pipeline/embedding-models/multimodal-embedding/src/blip2_server.py [0:0]
def generate_multimodal_embeddings():
"""Generates multimodal embeddings for a given image and caption.
This endpoint accepts a POST request with either a JSON payload containing
the `image_uri` and `caption` or a file upload named `image` and a form field
named `text`.
Args:
None (from request body):
JSON payload:
image_uri: The GCS URI of the image file.
caption: The input caption as a string.
Form data:
image: The image file.
text: The input caption as a form field.
Returns:
A JSON response containing the multimodal embeddings
"""
if request.method == "POST":
try:
if request.is_json:
try:
json_req = request.get_json()
except Exception as e:
return jsonify({"error": f"Invalid JSON payload: {e}"}), 400
if "image_uri" not in json_req:
return jsonify({"error": "No image_uri provided"}), 400
if "caption" not in json_req:
return jsonify({"error": "No caption provided"}), 400
try:
image = download_image_from_gcs(json_req["image_uri"])
except Exception as e:
return jsonify({"error": str(e)}), 400
caption = json_req["caption"]
else:
if "image" not in request.files:
return jsonify({"error": "No image provided"}), 400
if "text" not in request.form:
return jsonify({"error": "No text provided"}), 400
try:
image_file = request.files["image"]
image = Image.open(image_file).convert("RGB")
except Exception as e:
return jsonify({"error": f"Error processing image file: {e}"}), 400
caption = request.form["text"]
except Exception as e:
return jsonify({"error": str(e)}), 400
try:
multimodal_features = get_multimodal_embedding(image, caption)
except Exception as e:
return jsonify({"error": str(e)}), 400
logger.info("Multimodal embeddings generated successfully.")
return jsonify(
{
"multimodal_embeds": multimodal_features.tolist()[0][0],
}
)
else:
return jsonify({"error": "Invalid request method"}), 405