def convert_to_oai_format()

in seed/util/common_utils.py [0:0]


def convert_to_oai_format(qa_pair, system_prompt_msg="You're an AI assistant that guides a user to the location of your CS center"):
    """
    Convert the QA pair to the jsonl format required by the OpenAI API.

    Args:
        qa_pair: list of dictionaries or list of lists containing the QA pairs
        system_prompt_msg: message to be displayed as the system prompt

    Returns:
        formatted_data: jsonl format data for OpenAI API
    """
    if isinstance(qa_pair, list):
        formatted_data = []
        for qa in qa_pair:

            sample = [{"role": "system", "content": system_prompt_msg}]

            if isinstance(qa, list): # multi-turn
                for qa_ in qa:
                    if isinstance(qa_, dict):
                        user_message = {"role": "user", "content": qa_["QUESTION"]}
                        assistant_message = {"role": "assistant", "content": qa_["ANSWER"]}
                    else:
                        user_message = {"role": "user", "content": qa_[0]}
                        assistant_message = {"role": "assistant", "content": qa_[1]}
                    sample.append(user_message)
                    sample.append(assistant_message)
            else:  # single turn
                if isinstance(qa, dict):                
                    user_message = {"role": "user", "content": qa["QUESTION"]}
                    assistant_message = {"role": "assistant", "content": qa["ANSWER"]}
                else:
                    user_message = {"role": "user", "content": qa[0]}
                    assistant_message = {"role": "assistant", "content": qa[1]} 
                sample.append(user_message)
                sample.append(assistant_message)

            msg = {"messages": sample} 
            formatted_data.append(msg)
        random.shuffle(formatted_data) 
        return formatted_data

    else:
        print("Argument is not a list")
        return None