def generate_chat()

in gradio_demos/chatbot_demo.py [0:0]


def generate_chat(user_input, history, model_choice):
    # Load the model if not already cached
    if model_choice not in model_cache:
        model_cache[model_choice] = load_model(llama_models[model_choice])
    generator = model_cache[model_choice]

    # Initial system prompt
    system_prompt = {"role": "system", "content": "You are a helpful assistant"}

    # Initialize history if it's None
    if history is None:
        history = [system_prompt]
    
    # Append user input to history
    history.append({"role": "user", "content": user_input})

    # Generate response using the model
    response = generator(
        history,
        max_length=512,
        pad_token_id=generator.tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )[-1]["generated_text"][-1]["content"]

    # Append model response to history
    history.append({"role": "assistant", "content": response})
    
    return history