app.py (69 lines of code) (raw):

from flask import ( Flask, render_template, request, Response, stream_with_context, jsonify, ) from werkzeug.utils import secure_filename from PIL import Image import io from dotenv import load_dotenv import os from google import genai # Load environment variables from .env file load_dotenv() ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg"} client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY")) chat_session = client.chats.create(model="gemini-2.0-flash") app = Flask(__name__, static_folder='static', template_folder='templates') next_message = "" next_image = "" def allowed_file(filename): """Returns if a filename is supported via its extension""" _, ext = os.path.splitext(filename) return ext.lstrip('.').lower() in ALLOWED_EXTENSIONS @app.route("/upload", methods=["POST"]) def upload_file(): """Takes in a file, checks if it is valid, and saves it for the next request to the API """ global next_image if "file" not in request.files: return jsonify(success=False, message="No file part") file = request.files["file"] if file.filename == "": return jsonify(success=False, message="No selected file") if file and allowed_file(file.filename): filename = secure_filename(file.filename) # Read the file stream into a BytesIO object file_stream = io.BytesIO(file.read()) file_stream.seek(0) next_image = Image.open(file_stream) return jsonify( success=True, message="File uploaded successfully and added to the conversation", filename=filename, ) return jsonify(success=False, message="File type not allowed") @app.route("/", methods=["GET"]) def index(): """Renders the main homepage for the app""" return render_template("index.html", chat_history=chat_session.get_history()) @app.route("/chat", methods=["POST"]) def chat(): """ Takes in the message the user wants to send to the Gemini API, saves it """ global next_message next_message = request.json["message"] print(chat_session.get_history()) return jsonify(success=True) @app.route("/stream", methods=["GET"]) def stream(): """ Streams the response from the server for both multi-modal and plain text requests """ def generate(): global next_message global next_image assistant_response_content = "" if next_image != "": response = chat_session.send_message_stream([next_message, next_image]) next_image = "" else: response = chat_session.send_message_stream(next_message) next_message = "" for chunk in response: assistant_response_content += chunk.text yield f"data: {chunk.text}\n\n" return Response(stream_with_context(generate()), mimetype="text/event-stream")