in lmms_eval/tasks/hallusion_bench/utils.py [0:0]
def get_eval_pair_all(data, model_correctness_entry): # per question pair
orig_correctness = dict()
counter = 0
lh_counter = 0
vi_counter = 0
both_counter = 0
for r in data:
if str(r["figure_id"]) == "0":
key = "_".join([r["category"], r["subcategory"], str(r["set_id"]), str(r["question_id"])])
orig_correctness[key] = r[model_correctness_entry]
get_eval_pair_dict = dict()
for r in data:
name = "_".join([r["category"], r["subcategory"], str(r["set_id"]), str(r["question_id"])])
if name in get_eval_pair_dict:
c, t = get_eval_pair_dict[name]
get_eval_pair_dict[name] = (c + r["correct"], t + 1)
else:
get_eval_pair_dict[name] = (r["correct"], 1)
counter += 1
eval_all_pair_stat = {}
eval_all_pair_stat["note"] = "all accuracy per question pair"
eval_all_pair_stat["total"] = len(get_eval_pair_dict.keys())
eval_all_pair_stat["total_q"] = counter
eval_all_pair_stat["correct"] = 0
eval_all_pair_stat["wrong"] = 0
eval_all_pair_stat["LH"] = 0
eval_all_pair_stat["VI"] = 0
eval_all_pair_stat["Mix"] = 0
eval_all_pair_stat["LH_cg"] = lh_counter
eval_all_pair_stat["VI_cg"] = vi_counter
eval_all_pair_stat["Mix_cg"] = both_counter
# for v in get_eval_pair_dict.values():
# if v[0] == v[1]:
# eval_all_pair_stat["correct"] += 1
# else:
# eval_all_pair_stat["wrong"] += 1
# for v in get_analysis_pair_dict.values():
# if v[0] > 0 and v[1] > 0:
# eval_all_pair_stat["Mix"] += 1
# elif v[0] > 0:
# eval_all_pair_stat["LH"] += 1
# elif v[1] > 0:
# eval_all_pair_stat["VI"] += 1
for k in get_eval_pair_dict.keys():
v = get_eval_pair_dict[k]
if v[0] == v[1]:
eval_all_pair_stat["correct"] += 1
else:
eval_all_pair_stat["wrong"] += 1
return eval_all_pair_stat