in experiments/familiarity/familiarity.py [0:0]
def get_per_comp_performance(grading_reports: list[dict]) -> dict:
"""
Get the performance for each competition as an average across grading reports.
Returns a dict with competition IDs as keys and the average model performance as the value
(where performance is the % of model performance between the sample submission and the gold medal score).
"""
performance_dict = {}
for comp_id, sample_submission_score in sample_submissions_scores:
if np.isnan(sample_submission_score):
continue
perfs = []
for grading_report in grading_reports:
for comp in grading_report["competition_reports"]:
if comp["competition_id"] == comp_id:
if comp["score"] is None or np.isnan(comp["score"]):
performance = 0
else:
performance = (comp["score"] - sample_submission_score) / (
comp["gold_threshold"] - sample_submission_score
)
perfs.append(performance)
if len(perfs) == 0: # Skip if no valid performance data
continue
performance_dict[comp_id] = np.mean(perfs)
return performance_dict