in seed/util/common_utils.py [0:0]
def convert_to_oai_format(qa_pair, system_prompt_msg="You're an AI assistant that guides a user to the location of your CS center"):
"""
Convert the QA pair to the jsonl format required by the OpenAI API.
Args:
qa_pair: list of dictionaries or list of lists containing the QA pairs
system_prompt_msg: message to be displayed as the system prompt
Returns:
formatted_data: jsonl format data for OpenAI API
"""
if isinstance(qa_pair, list):
formatted_data = []
for qa in qa_pair:
sample = [{"role": "system", "content": system_prompt_msg}]
if isinstance(qa, list): # multi-turn
for qa_ in qa:
if isinstance(qa_, dict):
user_message = {"role": "user", "content": qa_["QUESTION"]}
assistant_message = {"role": "assistant", "content": qa_["ANSWER"]}
else:
user_message = {"role": "user", "content": qa_[0]}
assistant_message = {"role": "assistant", "content": qa_[1]}
sample.append(user_message)
sample.append(assistant_message)
else: # single turn
if isinstance(qa, dict):
user_message = {"role": "user", "content": qa["QUESTION"]}
assistant_message = {"role": "assistant", "content": qa["ANSWER"]}
else:
user_message = {"role": "user", "content": qa[0]}
assistant_message = {"role": "assistant", "content": qa[1]}
sample.append(user_message)
sample.append(assistant_message)
msg = {"messages": sample}
formatted_data.append(msg)
random.shuffle(formatted_data)
return formatted_data
else:
print("Argument is not a list")
return None