def eval_cflue()

in DianJin-R1/src/evaluate/eval.py [0:0]


def eval_cflue(path):
    box_pattern = r"\\boxed\{(.*?)\}"
    data = json.load(open(path, "r"))
    choices = "ABCDEFG"
    cnt = 0
    for item in data:
        # 标准答案
        answer = item["answer"]
        # 预测结果
        output = item["output"]
        if output is None:
            print("=" * 20 + " None " + "=" * 20)
            continue
        matches = re.findall(box_pattern, output, re.DOTALL)
        if len(matches) == 0:
            print("=" * 20 + " Wrong Format " + "=" * 20)
            print(item["instruction"])
            print(answer)
            continue
        else:
            pred = ""
            for c in choices:
                if c in matches[-1]:
                    pred += c
            if answer == pred:
                cnt += 1
            else:
                print("=" * 20 + " Wrong Answer " + "=" * 20)
                print(pred)
                print(answer)

    print("=" * 20 + " Accuracy " + "=" * 20)
    print(cnt)
    print(len(data))
    print(cnt / len(data))