in fastchat/llm_judge/common.py [0:0]
def play_a_match_single(match: MatchPair, output_file: str):
question, model, answer, judge, ref_answer, multi_turn = (
match.question,
match.model,
match.answer,
match.judge,
match.ref_answer,
match.multi_turn,
)
if judge.prompt_template["type"] == "single":
score, user_prompt, judgment = run_judge_single(
question, answer, judge, ref_answer, multi_turn=multi_turn
)
question_id = question["question_id"]
turn = 1 if not multi_turn else 2
result = {
"question_id": question_id,
"model": model,
"judge": (judge.model_name, judge.prompt_template["name"]),
"user_prompt": user_prompt,
"judgment": judgment,
"score": score,
"turn": turn,
"tstamp": time.time(),
}
print(
f"question: {question_id}, turn: {turn}, model: {model}, "
f"score: {score}, "
f"judge: {(judge.model_name, judge.prompt_template['name'])}"
)
else:
raise ValueError(f"invalid judge type: {judge['type']}")
if output_file:
os.makedirs(os.path.dirname(output_file), exist_ok=True)
with open(output_file, "a") as fout:
fout.write(json.dumps(result) + "\n")
return result