in src/alpaca_eval/decoders/openai.py [0:0]
def _prompt_to_chatml(prompt: str, start_token: str = "<|im_start|>", end_token: str = "<|im_end|>"):
r"""Convert a text prompt to ChatML formal
Examples
--------
>>> prompt = (
... "<|im_start|>system\n"
... "You are a helpful assistant.\n<|im_end|>\n"
... "<|im_start|>system name=example_user\nKnock knock.\n<|im_end|>\n<|im_start|>system name=example_assistant\n"
... "Who's there?\n<|im_end|>\n<|im_start|>user\nOrange.\n<|im_end|>"
... )
>>> print(prompt)
<|im_start|>system
You are a helpful assistant.
<|im_end|>
<|im_start|>system name=example_user
Knock knock.
<|im_end|>
<|im_start|>system name=example_assistant
Who's there?
<|im_end|>
<|im_start|>user
Orange.
<|im_end|>
>>> _prompt_to_chatml(prompt)
[{'content': 'You are a helpful assistant.', 'role': 'system'},
{'content': 'Knock knock.', 'role': 'system', 'name': 'example_user'},
{'content': "Who's there?", 'role': 'system', 'name': 'example_assistant'},
{'content': 'Orange.', 'role': 'user'}]
"""
prompt = prompt.strip()
assert prompt.startswith(start_token)
assert prompt.endswith(end_token)
message = []
for p in prompt.split("<|im_start|>")[1:]:
newline_splitted = p.split("\n", 1)
role = newline_splitted[0].strip()
content = newline_splitted[1].split(end_token, 1)[0].strip()
if role.startswith("system") and role != "system":
# based on https://github.com/openai/openai-cookbook/blob/main/examples
# /How_to_format_inputs_to_ChatGPT_models.ipynb
# and https://github.com/openai/openai-python/blob/main/chatml.md it seems that system can specify a
# dictionary of other args
other_params = _string_to_dict(role.split("system", 1)[-1])
role = "system"
else:
other_params = dict()
message.append(dict(content=content, role=role, **other_params))
return message