in DianJin-R1/src/evaluate/eval.py [0:0]
def eval_cflue(path):
box_pattern = r"\\boxed\{(.*?)\}"
data = json.load(open(path, "r"))
choices = "ABCDEFG"
cnt = 0
for item in data:
# 标准答案
answer = item["answer"]
# 预测结果
output = item["output"]
if output is None:
print("=" * 20 + " None " + "=" * 20)
continue
matches = re.findall(box_pattern, output, re.DOTALL)
if len(matches) == 0:
print("=" * 20 + " Wrong Format " + "=" * 20)
print(item["instruction"])
print(answer)
continue
else:
pred = ""
for c in choices:
if c in matches[-1]:
pred += c
if answer == pred:
cnt += 1
else:
print("=" * 20 + " Wrong Answer " + "=" * 20)
print(pred)
print(answer)
print("=" * 20 + " Accuracy " + "=" * 20)
print(cnt)
print(len(data))
print(cnt / len(data))