in leaderboard/plots.py [0:0]
def rank_correlation_table(filetypes: List[str], commit: bool = False, include_test: bool = True):
irt_model = "3PL"
dev_irt_params = IrtParsed.from_irt_file(
Path(conf["irt"]["squad"]["dev"]["pyro"][irt_model]["full"]) / "parameters.json"
)
dev_predictions = LeaderboardPredictions.parse_file(
conf["squad"]["submission_predictions"]["dev"]
)
dev_id_to_subject = load_squad_submissions(dev_predictions)
console.log("N Dev IRT", len(dev_irt_params.model_stats))
stats_results = run_stats_tournament("dev")
mcnemar_results = stats_results["mcnemar"]
see_results = stats_results["see"]
student_t_results = stats_results["student_t"]
sem_results = stats_results["sem"]
if include_test:
mapping = read_json(conf["squad"]["dev_to_test"])
dev_to_test = mapping["dev_to_test"]
test_irt_params = IrtParsed.from_irt_file(
Path(conf["irt"]["squad"]["test"]["pyro"][irt_model]["full"]) / "parameters.json"
)
console.log("N Test IRT", len(test_irt_params.model_stats))
test_stats_results = run_stats_tournament("test")
test_mcnemar_results = test_stats_results["mcnemar"]
test_see_results = test_stats_results["see"]
test_student_t_results = test_stats_results["student_t"]
test_sem_results = test_stats_results["sem"]
else:
mapping = None
dev_to_test = None
test_irt_params = None
test_stats_results = None
test_mcnemar_results = None
test_see_results = None
test_student_t_results = None
test_sem_results = None
rows = []
n_test = 0
n_dev = 0
for subject_id in dev_id_to_subject.keys():
subject = dev_id_to_subject[subject_id]
entry = {
"subject_id": subject_id,
"name": subject["name"],
"dev_em": subject["dev_em"],
"test_em": subject["test_em"],
"dev_skill": dev_irt_params.model_stats[subject_id].skill,
# "dev_mcnemar": mcnemar_results[subject_id]["win"],
# "dev_see": see_results[subject_id]["win"],
# "dev_student_t": student_t_results[subject_id]["win"],
# "dev_sem": sem_results[subject_id]["win"],
}
n_dev += 1
if include_test:
if subject_id in dev_to_test:
test_subject_id = dev_to_test[subject_id]
if test_subject_id in test_irt_params.model_stats:
entry["test_skill"] = test_irt_params.model_stats[test_subject_id].skill
# entry["test_mcnemar"] = test_mcnemar_results[test_subject_id]["win"]
# entry["test_see"] = test_see_results[test_subject_id]["win"]
# entry["test_student_t"] = test_student_t_results[test_subject_id][
# "win"
# ]
# entry["test_sem"] = test_sem_results[test_subject_id]["win"]
n_test += 1
rows.append(entry)
console.log("N Dev", n_dev, "N Test", n_test)
df = pd.DataFrame(rows).dropna(axis=0)
console.log(df)
name_mapping = {
"dev_em": r"EM$_{\text{dev}}$",
"test_em": r"EM$_{\text{test}}$",
"dev_skill": r"Ability$_{\text{dev}}$",
"test_skill": r"Ability$_{\text{test}}$",
}
correlations = df.corr(method="kendall")
correlations.to_pickle("/tmp/leaderboard_correlations.pickle")
console.log(correlations)
print(
correlations.applymap(lambda n: f"${to_precision(n, 3)}$")
.rename(columns=name_mapping, index=name_mapping)
.to_latex(column_format="l" + len(name_mapping) * "r", escape=False)
)