LLM/openai_api_language_model.py (90 lines of code) (raw):
import logging
import time
from nltk import sent_tokenize
from rich.console import Console
from openai import OpenAI
from baseHandler import BaseHandler
from LLM.chat import Chat
logger = logging.getLogger(__name__)
console = Console()
WHISPER_LANGUAGE_TO_LLM_LANGUAGE = {
"en": "english",
"fr": "french",
"es": "spanish",
"zh": "chinese",
"ja": "japanese",
"ko": "korean",
}
class OpenApiModelHandler(BaseHandler):
"""
Handles the language model part.
"""
def setup(
self,
model_name="deepseek-chat",
device="cuda",
gen_kwargs={},
base_url =None,
api_key=None,
stream=False,
user_role="user",
chat_size=1,
init_chat_role="system",
init_chat_prompt="You are a helpful AI assistant.",
):
self.model_name = model_name
self.stream = stream
self.chat = Chat(chat_size)
if init_chat_role:
if not init_chat_prompt:
raise ValueError(
"An initial promt needs to be specified when setting init_chat_role."
)
self.chat.init_chat({"role": init_chat_role, "content": init_chat_prompt})
self.user_role = user_role
self.client = OpenAI(api_key=api_key, base_url=base_url)
self.warmup()
def warmup(self):
logger.info(f"Warming up {self.__class__.__name__}")
start = time.time()
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": "system", "content": "You are a helpful assistant"},
{"role": "user", "content": "Hello"},
],
stream=self.stream
)
end = time.time()
logger.info(
f"{self.__class__.__name__}: warmed up! time: {(end - start):.3f} s"
)
def process(self, prompt):
logger.debug("call api language model...")
self.chat.append({"role": self.user_role, "content": prompt})
language_code = None
if isinstance(prompt, tuple):
prompt, language_code = prompt
if language_code[-5:] == "-auto":
language_code = language_code[:-5]
prompt = f"Please reply to my message in {WHISPER_LANGUAGE_TO_LLM_LANGUAGE[language_code]}. " + prompt
response = self.client.chat.completions.create(
model=self.model_name,
messages=[
{"role": self.user_role, "content": prompt},
],
stream=self.stream
)
if self.stream:
generated_text, printable_text = "", ""
for chunk in response:
new_text = chunk.choices[0].delta.content or ""
generated_text += new_text
printable_text += new_text
sentences = sent_tokenize(printable_text)
if len(sentences) > 1:
yield sentences[0], language_code
printable_text = new_text
self.chat.append({"role": "assistant", "content": generated_text})
# don't forget last sentence
yield printable_text, language_code
else:
generated_text = response.choices[0].message.content
self.chat.append({"role": "assistant", "content": generated_text})
yield generated_text, language_code