in route.py [0:0]
def generate_response(self, prompts, max_tokens=1024, temperature=0.01, top_p=0.5):
sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_tokens, skip_special_tokens=True, stop=self.tokenizer.eos_token)
if self.tag in ['mistral']:
messages_list = [[{"role": "user", "content": p}] for p in prompts]
else:
messages_list = [[{"role": "system", "content": "You are a helpful SQLite assistant."},{"role": "user", "content": p}] for p in prompts]
messages_list = self.tokenizer.apply_chat_template(messages_list, add_generation_prompt=True,tokenize=False)
outputs = self.llm.generate(messages_list, sampling_params)
return [output.outputs[0].text for output in outputs]