lm_eval/tasks/leaderboard/math/_leaderboard_math.yaml (13 lines of code) (raw):

group: leaderboard_math_hard task: - leaderboard_math_algebra_hard - leaderboard_math_counting_and_prob_hard - leaderboard_math_geometry_hard - leaderboard_math_intermediate_algebra_hard - leaderboard_math_num_theory_hard - leaderboard_math_prealgebra_hard - leaderboard_math_precalculus_hard aggregate_metric_list: - metric: exact_match aggregation: mean weight_by_size: true