in app/app.py [0:0]
def call_pipeline(messages: list, language: str):
"""Call the appropriate model pipeline based on configuration"""
if ZERO_GPU:
tokenizer = CLIENT["tokenizer"]
formatted_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
)
response = CLIENT["pipeline"](
formatted_prompt,
clean_up_tokenization_spaces=False,
max_length=2000,
return_full_text=False,
)
return response[0]["generated_text"]
else:
response = CLIENT(
messages,
clean_up_tokenization_spaces=False,
max_length=2000,
)
return response[0]["generated_text"][-1]["content"]