utils/dataset_common.py (15 lines of code) (raw):
def convert_to_preference_format(dataset):
json_format = [
{
"chosen_conversations": [
{"content": row["prompt"], "role": "user"},
{"content": row["chosen"], "role": "assistant"}
],
"rejected_conversations": [
{"content": row["prompt"], "role": "user"},
{"content": row["rejected"], "role": "assistant"}
]
}
for row in dataset
]
return json_format