gradio_demos/chatbot_demo.py (52 lines of code) (raw):

from huggingface_hub import login import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # Log in to Hugging Face Hub login() # Determine the device to use (GPU if available, otherwise CPU) device = 0 if torch.cuda.is_available() else -1 # Dictionary mapping model names to their Hugging Face Hub identifiers llama_models = { "Llama 3 70B Instruct": "meta-llama/Meta-Llama-3-70B-Instruct", "Llama 3 8B Instruct": "meta-llama/Meta-Llama-3-8B-Instruct", "Llama 3.1 70B Instruct": "meta-llama/Llama-3.1-70B-Instruct", "Llama 3.1 8B Instruct": "meta-llama/Llama-3.1-8B-Instruct", "Llama 3.2 3B Instruct": "meta-llama/Llama-3.2-3B-Instruct", "Llama 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct", } # Function to load the model and tokenizer def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device) return generator # Cache to store loaded models model_cache = {} # Function to generate chat responses def generate_chat(user_input, history, model_choice): # Load the model if not already cached if model_choice not in model_cache: model_cache[model_choice] = load_model(llama_models[model_choice]) generator = model_cache[model_choice] # Initial system prompt system_prompt = {"role": "system", "content": "You are a helpful assistant"} # Initialize history if it's None if history is None: history = [system_prompt] # Append user input to history history.append({"role": "user", "content": user_input}) # Generate response using the model response = generator( history, max_length=512, pad_token_id=generator.tokenizer.eos_token_id, do_sample=True, temperature=0.7, top_p=0.9 )[-1]["generated_text"][-1]["content"] # Append model response to history history.append({"role": "assistant", "content": response}) return history # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("<h1><center>Chat with Llama Models</center></h1>") # Dropdown to select model model_choice = gr.Dropdown(list(llama_models.keys()), label="Select Llama Model") # Chatbot interface chatbot = gr.Chatbot(label="Chatbot Interface", type="messages") # Textbox for user input txt_input = gr.Textbox(show_label=False, placeholder="Type your message here...") # Function to handle user input and generate response def respond(user_input, chat_history, model_choice): if model_choice is None: model_choice = list(llama_models.keys())[0] updated_history = generate_chat(user_input, chat_history, model_choice) return "", updated_history # Submit user input on pressing Enter txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot]) # Button to submit user input submit_btn = gr.Button("Submit") submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot]) # Launch the Gradio demo demo.launch()