in scripts/adapet/ADAPET/utilcode.py [0:0]
def write_evaluation_json(accs, mics, macs, avg_pres, logit_aps, num_labs, sample_size, task_name, configs, english, prompt):
if sample_size in ["full", 500]:
assert len(accs) == len(mics) == len(macs) == len(avg_pres) == len(logit_aps) == len(ADAPET_SEEDS)
else:
assert len(accs) == len(mics) == len(macs) == len(avg_pres) == len(logit_aps) == len(SEEDS)
round_to = 10
mean_acc = round(np.mean(accs), round_to)
acc_std = round(np.std(accs), round_to)
mean_micro = round(np.mean(mics), round_to)
micro_std = round(np.std(mics), round_to)
mean_macro = round(np.mean(macs), round_to)
macro_std = round(np.std(macs), round_to)
mean_avg_pre = round(np.mean(avg_pres), round_to)
avg_pre_std = round(np.std(avg_pres), round_to)
mean_logit_ap = round(np.mean(logit_aps), round_to)
logit_ap_std = round(np.std(logit_aps), round_to)
#in the multiclass scenario, average precision is not defined
if num_labs > 2:
mean_avg_pre = 'NA'
avg_pre_std = 'NA'
mean_logit_ap = 'NA'
logit_ap_std = 'NA'
json_dict = {
"mean_acc": mean_acc,
"acc_std": acc_std,
"mean_f1_mic": mean_micro,
"f1_mic_std": micro_std,
"mean_f1_mac": mean_macro,
"f1_mac_std": macro_std,
"mean_avg_pre": mean_avg_pre,
"avg_pre_std": avg_pre_std,
"mean_logit_ap": mean_logit_ap,
"logit_ap_std": logit_ap_std,
}
write_dir = 'results/'+ configs["pretrained_weight"] + '/' + task_name.lower()[7:]
if english:
write_dir = write_dir + '_eng'
else:
write_dir = write_dir + '_lang'
if prompt:
write_dir = write_dir + '_prompt'
else:
write_dir = write_dir + '_no_prompt'
if not os.path.exists(write_dir):
os.makedirs(write_dir)
writefile = write_dir + "/" + str(sample_size) + "_split_results.json"
with open(writefile, "w") as f:
f.write(json.dumps(json_dict) + "\n")